Skip to content

Commit

Permalink
leftover useage
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Nov 20, 2024
1 parent 02b46bd commit d32297d
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 8 deletions.
22 changes: 19 additions & 3 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import numpy as np
import pandas as pd

import pylibcudf as plc

import cudf
from cudf import _lib as libcudf
from cudf._lib import groupby as libgroupby
Expand All @@ -25,6 +27,7 @@
from cudf.api.types import is_list_like, is_numeric_dtype
from cudf.core._compat import PANDAS_LT_300
from cudf.core.abc import Serializable
from cudf.core.buffer import acquire_spill_lock
from cudf.core.column.column import ColumnBase, StructDtype, as_column
from cudf.core.column_accessor import ColumnAccessor
from cudf.core.copy_types import GatherMap
Expand Down Expand Up @@ -770,9 +773,22 @@ def agg(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
join_keys = map(list, zip(*join_keys))
# By construction, left and right keys are related by
# a permutation, so we can use an inner join.
left_order, right_order = libcudf.join.join(
*join_keys, how="inner"
)
with acquire_spill_lock():
plc_tables = [
plc.Table(
[col.to_pylibcudf(mode="read") for col in cols]
)
for cols in join_keys
]
left_plc, right_plc = plc.join.inner_join(
plc_tables[0],
plc_tables[1],
plc.types.NullEquality.EQUAL,
)
left_order = libcudf.column.Column.from_pylibcudf(left_plc)
right_order = libcudf.column.Column.from_pylibcudf(
right_plc
)
# left order is some permutation of the ordering we
# want, and right order is a matching gather map for
# the result table. Get the correct order by sorting
Expand Down
12 changes: 11 additions & 1 deletion python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import pyarrow as pa
from typing_extensions import Self

import pylibcudf as plc

import cudf
from cudf import _lib as libcudf
from cudf._lib.filling import sequence
Expand All @@ -32,6 +34,7 @@
)
from cudf.core._base_index import BaseIndex, _return_get_indexer_result
from cudf.core._compat import PANDAS_LT_300
from cudf.core.buffer import acquire_spill_lock
from cudf.core.column import (
CategoricalColumn,
ColumnBase,
Expand Down Expand Up @@ -1360,7 +1363,14 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
except ValueError:
return _return_get_indexer_result(result.values)

scatter_map, indices = libcudf.join.join([lcol], [rcol], how="inner")
with acquire_spill_lock():
left_plc, right_plc = plc.join.inner_join(
plc.Table([lcol.to_pylibcudf(mode="read")]),
plc.Table([rcol.to_pylibcudf(mode="read")]),
plc.types.NullEquality.EQUAL,
)
scatter_map = libcudf.column.Column.from_pylibcudf(left_plc)
indices = libcudf.column.Column.from_pylibcudf(right_plc)
result = libcudf.copying.scatter([indices], scatter_map, [result])[0]
result_series = cudf.Series._from_column(result)

Expand Down
19 changes: 15 additions & 4 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import numpy as np
import pandas as pd

import pylibcudf as plc

import cudf
import cudf._lib as libcudf
from cudf._lib.types import size_type_dtype
Expand All @@ -22,6 +24,7 @@
from cudf.core import column
from cudf.core._base_index import _return_get_indexer_result
from cudf.core.algorithms import factorize
from cudf.core.buffer import acquire_spill_lock
from cudf.core.column_accessor import ColumnAccessor
from cudf.core.frame import Frame
from cudf.core.index import (
Expand Down Expand Up @@ -1919,10 +1922,18 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
for lcol, rcol in zip(target._columns, self._columns)
]
join_keys = map(list, zip(*join_keys))
scatter_map, indices = libcudf.join.join(
*join_keys,
how="inner",
)
with acquire_spill_lock():
plc_tables = [
plc.Table([col.to_pylibcudf(mode="read") for col in cols])
for cols in join_keys
]
left_plc, right_plc = plc.join.inner_join(
plc_tables[0],
plc_tables[1],
plc.types.NullEquality.EQUAL,
)
scatter_map = libcudf.column.Column.from_pylibcudf(left_plc)
indices = libcudf.column.Column.from_pylibcudf(right_plc)
result = libcudf.copying.scatter([indices], scatter_map, [result])[0]
result_series = cudf.Series._from_column(result)

Expand Down

0 comments on commit d32297d

Please sign in to comment.