Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into misc/ci-cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb authored Sep 25, 2024
2 parents aecd465 + 8378379 commit 6ea3dd9
Show file tree
Hide file tree
Showing 23 changed files with 363 additions and 80 deletions.
5 changes: 0 additions & 5 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,6 @@ if hasArg --pydevelop; then
PYTHON_ARGS_FOR_INSTALL="${PYTHON_ARGS_FOR_INSTALL} -e"
fi

# Append `-DFIND_CUDF_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option.
if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_CPP"* ]]; then
EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_CPP=ON"
fi

if hasArg --disable_large_strings; then
BUILD_DISABLE_LARGE_STRINGS="ON"
fi
Expand Down
4 changes: 2 additions & 2 deletions ci/cudf_pandas_scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ else

echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
# `test_python` constraints are for `[test]` not `[cudf-pandas-tests]`
# `test_python_cudf_pandas` constraints are for `[test]` not `[cudf-pandas-tests]`
rapids-dependency-file-generator \
--output requirements \
--file-key test_python \
--file-key test_python_cudf_pandas \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi
Expand Down
6 changes: 3 additions & 3 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ NEXT_PATCH=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[3]}')
NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}

# Need to distutils-normalize the versions for some use cases
CURRENT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${CURRENT_SHORT_TAG}'))")
NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
PATCH_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_PATCH}'))")
CURRENT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${CURRENT_SHORT_TAG}'))")
NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))")
PATCH_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_PATCH}'))")

echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"

Expand Down
4 changes: 2 additions & 2 deletions ci/test_python_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ set -euo pipefail
rapids-logger "Generate Python testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"

FILE_KEY=$1
rapids-dependency-file-generator \
--output conda \
--file-key test_python \
--file-key ${FILE_KEY} \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee "${ENV_YAML_DIR}/env.yaml"

Expand Down
2 changes: 1 addition & 1 deletion ci/test_python_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../;

# Common setup steps shared by Python test jobs
source ./ci/test_python_common.sh
source ./ci/test_python_common.sh test_python_cudf

rapids-logger "Check GPU usage"
nvidia-smi
Expand Down
2 changes: 1 addition & 1 deletion ci/test_python_other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../

# Common setup steps shared by Python test jobs
source ./ci/test_python_common.sh
source ./ci/test_python_common.sh test_python_other

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
Expand Down
36 changes: 32 additions & 4 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,28 @@ files:
includes:
- cuda_version
- test_cpp
test_python:
test_python_cudf_pandas:
output: none
includes:
- cuda_version
- py_version
- test_python_common
- test_python_cudf
- test_python_dask_cudf
- test_python_cudf_pandas
test_python_cudf:
output: none
includes:
- cuda_version
- py_version
- test_python_common
- test_python_cudf
test_python_other:
output: none
includes:
- cuda_version
- py_version
- test_python_common
- test_python_dask_cudf
test_java:
output: none
includes:
Expand Down Expand Up @@ -707,9 +720,7 @@ dependencies:
- matrix: {dependencies: "oldest"}
packages:
- numba==0.57.*
- numpy==1.23.*
- pandas==2.0.*
- pyarrow==14.0.0
- matrix:
packages:
- output_types: conda
Expand Down Expand Up @@ -764,6 +775,14 @@ dependencies:
- &transformers transformers==4.39.3
- tzdata
specific:
- output_types: [conda, requirements]
matrices:
- matrix: {dependencies: "oldest"}
packages:
- numpy==1.23.*
- pyarrow==14.0.0
- matrix:
packages:
- output_types: conda
matrices:
- matrix:
Expand All @@ -783,6 +802,15 @@ dependencies:
packages:
- dask-cuda==24.12.*,>=0.0.0a0
- *numba
specific:
- output_types: [conda, requirements]
matrices:
- matrix: {dependencies: "oldest"}
packages:
- numpy==1.24.*
- pyarrow==14.0.1
- matrix:
packages:
depends_on_libcudf:
common:
- output_types: conda
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
====
find
====

.. automodule:: pylibcudf.strings.findall
:members:
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ strings
contains
extract
find
findall
regex_flags
regex_program
repeat
Expand Down
35 changes: 10 additions & 25 deletions python/cudf/cudf/_lib/strings/findall.pyx
Original file line number Diff line number Diff line change
@@ -1,40 +1,25 @@
# Copyright (c) 2019-2024, NVIDIA CORPORATION.

from cython.operator cimport dereference
from libc.stdint cimport uint32_t
from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
from libcpp.utility cimport move

from cudf.core.buffer import acquire_spill_lock

from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.strings.findall cimport findall as cpp_findall
from pylibcudf.libcudf.strings.regex_flags cimport regex_flags
from pylibcudf.libcudf.strings.regex_program cimport regex_program

from cudf._lib.column cimport Column

import pylibcudf as plc


@acquire_spill_lock()
def findall(Column source_strings, object pattern, uint32_t flags):
"""
Returns data with all non-overlapping matches of `pattern`
in each string of `source_strings` as a lists column.
"""
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

cdef string pattern_string = <string>str(pattern).encode()
cdef regex_flags c_flags = <regex_flags>flags
cdef unique_ptr[regex_program] c_prog

with nogil:
c_prog = move(regex_program.create(pattern_string, c_flags))
c_result = move(cpp_findall(
source_view,
dereference(c_prog)
))

return Column.from_unique_ptr(move(c_result))
prog = plc.strings.regex_program.RegexProgram.create(
str(pattern), flags
)
plc_result = plc.strings.findall.findall(
source_strings.to_pylibcudf(mode="read"),
prog,
)
return Column.from_pylibcudf(plc_result)
10 changes: 10 additions & 0 deletions python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,12 @@ def _assert_fast_slow_eq(left, right):
assert_eq(left, right)


class ProxyFallbackError(Exception):
"""Raised when fallback occurs"""

pass


def _fast_function_call():
"""
Placeholder fast function for pytest profiling purposes.
Expand Down Expand Up @@ -957,6 +963,10 @@ def _fast_slow_function_call(
f"The exception was {e}."
)
except Exception as err:
if _env_get_bool("CUDF_PANDAS_FAIL_ON_FALLBACK", False):
raise ProxyFallbackError(
f"The operation failed with cuDF, the reason was {type(err)}: {err}."
) from err
with nvtx.annotate(
"EXECUTE_SLOW",
color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"],
Expand Down
16 changes: 15 additions & 1 deletion python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@

from cudf.core._compat import PANDAS_GE_220
from cudf.pandas import LOADED, Profiler
from cudf.pandas.fast_slow_proxy import _Unusable, is_proxy_object
from cudf.pandas.fast_slow_proxy import (
ProxyFallbackError,
_Unusable,
is_proxy_object,
)
from cudf.testing import assert_eq

if not LOADED:
Expand Down Expand Up @@ -1738,3 +1742,13 @@ def add_one_ufunc(a):
return a + 1

assert_eq(cp.asarray(add_one_ufunc(arr1)), cp.asarray(add_one_ufunc(arr2)))


@pytest.mark.xfail(
reason="Fallback expected because casting to object is not supported",
)
def test_fallback_raises_error(monkeypatch):
with monkeypatch.context() as monkeycontext:
monkeycontext.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True")
with pytest.raises(ProxyFallbackError):
pd.Series(range(2)).astype(object)
100 changes: 100 additions & 0 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas_no_fallback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import pytest

from cudf.pandas import LOADED

if not LOADED:
raise ImportError("These tests must be run with cudf.pandas loaded")

import numpy as np
import pandas as pd


@pytest.fixture(autouse=True)
def fail_on_fallback(monkeypatch):
monkeypatch.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True")


@pytest.fixture
def dataframe():
df = pd.DataFrame(
{
"a": [1, 1, 1, 2, 3],
"b": [1, 2, 3, 4, 5],
"c": [1.2, 1.3, 1.5, 1.7, 1.11],
}
)
return df


@pytest.fixture
def series(dataframe):
return dataframe["a"]


@pytest.fixture
def array(series):
return series.values


@pytest.mark.parametrize(
"op",
[
"sum",
"min",
"max",
"mean",
"std",
"var",
"prod",
"median",
],
)
def test_no_fallback_in_reduction_ops(series, op):
s = series
getattr(s, op)()


def test_groupby(dataframe):
df = dataframe
df.groupby("a", sort=True).max()


def test_no_fallback_in_binops(dataframe):
df = dataframe
df + df
df - df
df * df
df**df
df[["a", "b"]] & df[["a", "b"]]
df <= df


def test_no_fallback_in_groupby_rolling_sum(dataframe):
df = dataframe
df.groupby("a").rolling(2).sum()


def test_no_fallback_in_concat(dataframe):
df = dataframe
pd.concat([df, df])


def test_no_fallback_in_get_shape(dataframe):
df = dataframe
df.shape


def test_no_fallback_in_array_ufunc_op(array):
np.add(array, array)


def test_no_fallback_in_merge(dataframe):
df = dataframe
pd.merge(df * df, df + df, how="inner")
pd.merge(df * df, df + df, how="outer")
pd.merge(df * df, df + df, how="left")
pd.merge(df * df, df + df, how="right")
Loading

0 comments on commit 6ea3dd9

Please sign in to comment.