From 97518ac124c2e5992f0bd75f71ccacf06cd866a8 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 12 Jun 2024 19:04:03 +0100 Subject: [PATCH] Fix typo bug in gather implementation (#16000) Pylibcudf calls the datatype accessor type(). Add tests to cover this case, and raising on out of bounds accesses. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Thomas Li (https://github.com/lithomas1) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/16000 --- python/cudf_polars/cudf_polars/dsl/expr.py | 2 +- .../tests/expressions/test_gather.py | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py index 13e496136b5..377a905aed6 100644 --- a/python/cudf_polars/cudf_polars/dsl/expr.py +++ b/python/cudf_polars/cudf_polars/dsl/expr.py @@ -801,7 +801,7 @@ def do_evaluate( obj = plc.replace.replace_nulls( indices.obj, plc.interop.from_arrow( - pa.scalar(n, type=plc.interop.to_arrow(indices.obj.data_type())) + pa.scalar(n, type=plc.interop.to_arrow(indices.obj.type())) ), ) else: diff --git a/python/cudf_polars/tests/expressions/test_gather.py b/python/cudf_polars/tests/expressions/test_gather.py index df33e19a0b6..6bffa3e252c 100644 --- a/python/cudf_polars/tests/expressions/test_gather.py +++ b/python/cudf_polars/tests/expressions/test_gather.py @@ -2,8 +2,11 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations +import pytest + import polars as pl +from cudf_polars import execute_with_cudf from cudf_polars.testing.asserts import assert_gpu_result_equal @@ -17,3 +20,31 @@ def test_gather(): query = ldf.select(pl.col("a").gather(pl.col("b"))) assert_gpu_result_equal(query) + + +def test_gather_with_nulls(): + ldf = pl.LazyFrame( + { + "a": [1, 2, 3, 4, 5, 6, 7], + "b": [0, None, 1, None, 6, 1, 0], + } + ) + + query = ldf.select(pl.col("a").gather(pl.col("b"))) + + assert_gpu_result_equal(query) + + +@pytest.mark.parametrize("negative", [False, True]) +def test_gather_out_of_bounds(negative): + ldf = pl.LazyFrame( + { + "a": [1, 2, 3, 4, 5, 6, 7], + "b": [0, -10 if negative else 10, 1, 2, 6, 1, 0], + } + ) + + query = ldf.select(pl.col("a").gather(pl.col("b"))) + + with pytest.raises(pl.exceptions.ComputeError): + query.collect(post_opt_callback=execute_with_cudf)