Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into fea-strings-find-re
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Sep 25, 2024
2 parents 99651c1 + ba7d6e7 commit f703265
Show file tree
Hide file tree
Showing 13 changed files with 60 additions and 56 deletions.
11 changes: 9 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf pylibcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings"
HELP="$0 [clean] [libcudf] [pylibcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
VALIDARGS="clean libcudf pylibcudf cudf cudf_polars cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings"
HELP="$0 [clean] [libcudf] [pylibcudf] [cudf] [cudf_polars] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
libcudf - build the cudf C++ code only
pylibcudf - build the pylibcudf Python package
cudf - build the cudf Python package
cudf_polars - build the cudf_polars Python package
cudfjar - build cudf JAR with static libcudf using devtoolset toolchain
dask_cudf - build the dask_cudf Python package
benchmarks - build benchmarks
Expand Down Expand Up @@ -353,6 +354,12 @@ if buildAll || hasArg cudf; then
python ${PYTHON_ARGS_FOR_INSTALL} .
fi

# Build and install the cudf_polars Python package
if buildAll || hasArg cudf_polars; then

cd ${REPODIR}/python/cudf_polars
python ${PYTHON_ARGS_FOR_INSTALL} .
fi

# Build and install the dask_cudf Python package
if buildAll || hasArg dask_cudf; then
Expand Down
2 changes: 1 addition & 1 deletion ci/run_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ python -m pytest \
-m "" \
-p cudf_polars.testing.plugin \
-v \
--tb=short \
--tb=native \
${DESELECTED_TESTS} \
"$@" \
py-polars/tests
3 changes: 1 addition & 2 deletions ci/test_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl
rapids-logger "Install cudf_polars"
python -m pip install $(echo ./dist/cudf_polars*.whl)

# TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
TAG="py-1.7.0"
TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
rapids-logger "Clone polars to ${TAG}"
git clone https://github.com/pola-rs/polars.git --branch ${TAG} --depth 1

Expand Down
5 changes: 1 addition & 4 deletions ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,14 @@ if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
| tee ./constraints.txt
fi

# echo to expand wildcard before adding `[extra]` requires for pip
# echo to expand wildcard before adding `[test]` requires for pip
python -m pip install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"

rapids-logger "Pin to 1.7.0 Temporarily"
python -m pip install polars==1.7.0

rapids-logger "Run cudf_polars tests"

function set_exitcode()
Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- polars>=1.6
- polars>=1.8,<1.9
run_dask_cudf:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
8 changes: 5 additions & 3 deletions python/cudf_polars/cudf_polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@

from __future__ import annotations

# Check we have a supported polars version
import cudf_polars.utils.versions as v
from cudf_polars._version import __git_commit__, __version__
from cudf_polars.callback import execute_with_cudf
from cudf_polars.dsl.translate import translate_ir

del v
# Check we have a supported polars version
from cudf_polars.utils.versions import _ensure_polars_version

_ensure_polars_version()
del _ensure_polars_version

__all__: list[str] = [
"execute_with_cudf",
Expand Down
8 changes: 0 additions & 8 deletions python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,6 @@ def _(
cloud_options = None
else:
reader_options, cloud_options = map(json.loads, options)
if (
typ == "csv"
and visitor.version()[0] == 1
and reader_options["schema"] is not None
):
reader_options["schema"] = {
"fields": reader_options["schema"]["inner"]
} # pragma: no cover; CI tests 1.7
file_options = node.file_options
with_columns = file_options.with_columns
n_rows = file_options.n_rows
Expand Down
14 changes: 9 additions & 5 deletions python/cudf_polars/cudf_polars/testing/asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,11 @@ def assert_collect_raises(
cudf-polars.
Useful for controlling optimization settings.
polars_except
Exception or exceptions polars CPU is expected to raise.
Exception or exceptions polars CPU is expected to raise. If
None, CPU is not expected to raise an exception.
cudf_except
Exception or exceptions polars GPU is expected to raise.
Exception or exceptions polars GPU is expected to raise. If
None, GPU is not expected to raise an exception.
collect_kwargs
Common keyword arguments to pass to collect for both polars CPU and
cudf-polars.
Expand Down Expand Up @@ -203,7 +205,8 @@ def assert_collect_raises(
f"CPU execution RAISED {type(e)}, EXPECTED {polars_except}"
) from e
else:
raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")
if polars_except != ():
raise AssertionError(f"CPU execution DID NOT RAISE {polars_except}")

engine = GPUEngine(raise_on_fail=True)
try:
Expand All @@ -212,7 +215,8 @@ def assert_collect_raises(
pass
except Exception as e:
raise AssertionError(
f"GPU execution RAISED {type(e)}, EXPECTED {polars_except}"
f"GPU execution RAISED {type(e)}, EXPECTED {cudf_except}"
) from e
else:
raise AssertionError(f"GPU execution DID NOT RAISE {polars_except}")
if cudf_except != ():
raise AssertionError(f"GPU execution DID NOT RAISE {cudf_except}")
4 changes: 4 additions & 0 deletions python/cudf_polars/cudf_polars/testing/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,15 @@ def pytest_configure(config: pytest.Config):
"tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU",
"tests/unit/io/test_lazy_count_star.py::test_count_compressed_csv_18057": "Need to determine if file is compressed",
"tests/unit/io/test_lazy_csv.py::test_scan_csv_slice_offset_zero": "Integer overflow in sliced read",
"tests/unit/io/test_lazy_parquet.py::test_dsl2ir_cached_metadata[False]": "cudf-polars doesn't use metadata read by rust preprocessing",
"tests/unit/io/test_lazy_parquet.py::test_parquet_is_in_statistics": "Debug output on stderr doesn't match",
"tests/unit/io/test_lazy_parquet.py::test_parquet_statistics": "Debug output on stderr doesn't match",
"tests/unit/io/test_lazy_parquet.py::test_parquet_different_schema[False]": "Needs cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_schema_mismatch_panic_17067[False]": "Needs cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_slice_pushdown_non_zero_offset[False]": "Thrift data not handled correctly/slice pushdown wrong?",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read[False]": "Incomplete handling of projected reads with mismatching schemas, cudf#16394",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_dtype_mismatch[False]": "Different exception raised, but correctly raises an exception",
"tests/unit/io/test_lazy_parquet.py::test_parquet_unaligned_schema_read_missing_cols_from_first[False]": "Different exception raised, but correctly raises an exception",
"tests/unit/io/test_parquet.py::test_read_parquet_only_loads_selected_columns_15098": "Memory usage won't be correct due to GPU",
"tests/unit/io/test_scan.py::test_scan[single-csv-async]": "Debug output on stderr doesn't match",
"tests/unit/io/test_scan.py::test_scan_with_limit[single-csv-async]": "Debug output on stderr doesn't match",
Expand Down
16 changes: 8 additions & 8 deletions python/cudf_polars/cudf_polars/utils/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@

POLARS_VERSION = parse(__version__)

POLARS_VERSION_GE_16 = POLARS_VERSION >= parse("1.6")
POLARS_VERSION_GT_16 = POLARS_VERSION > parse("1.6")
POLARS_VERSION_LT_16 = POLARS_VERSION < parse("1.6")

if POLARS_VERSION_LT_16:
raise ImportError(
"cudf_polars requires py-polars v1.6 or greater."
) # pragma: no cover
POLARS_VERSION_LT_18 = POLARS_VERSION < parse("1.8")


def _ensure_polars_version():
if POLARS_VERSION_LT_18:
raise ImportError(
"cudf_polars requires py-polars v1.8 or greater."
) # pragma: no cover
2 changes: 1 addition & 1 deletion python/cudf_polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ authors = [
license = { text = "Apache 2.0" }
requires-python = ">=3.10"
dependencies = [
"polars>=1.6",
"polars>=1.8,<1.9",
"pylibcudf==24.12.*,>=0.0.0a0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
classifiers = [
Expand Down
6 changes: 5 additions & 1 deletion python/cudf_polars/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,11 @@ def test_groupby_nan_minmax_raises(op):
"expr",
[
pl.lit(1).alias("value"),
pl.lit([[4, 5, 6]]).alias("value"),
pytest.param(
pl.lit([[4, 5, 6]]).alias("value"),
marks=pytest.mark.xfail(reason="Need to expose OtherScalar in rust IR"),
),
pl.Series("value", [[4, 5, 6]], dtype=pl.List(pl.Int32)),
pl.col("float") * (1 - pl.col("int")),
[pl.lit(2).alias("value"), pl.col("float") * 2],
],
Expand Down
35 changes: 15 additions & 20 deletions python/cudf_polars/tests/testing/test_asserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

import polars as pl

from cudf_polars.containers import DataFrame
from cudf_polars.dsl.ir import Select
from cudf_polars.testing.asserts import (
assert_collect_raises,
assert_gpu_result_equal,
Expand Down Expand Up @@ -38,14 +36,24 @@ class E(Exception):
assert_ir_translation_raises(unsupported, E)


def test_collect_assert_raises(monkeypatch):
def test_collect_assert_raises():
df = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})

with pytest.raises(AssertionError):
# This should raise, because polars CPU can run this query
with pytest.raises(AssertionError, match="CPU execution DID NOT RAISE"):
# This should raise, because polars CPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=(),
)

with pytest.raises(AssertionError, match="GPU execution DID NOT RAISE"):
# This should raise, because polars GPU can run this query,
# but we expect an error.
assert_collect_raises(
df,
polars_except=(),
cudf_except=pl.exceptions.InvalidOperationError,
)

Expand All @@ -60,31 +68,18 @@ def test_collect_assert_raises(monkeypatch):
cudf_except=pl.exceptions.InvalidOperationError,
)

with pytest.raises(AssertionError):
with pytest.raises(AssertionError, match="GPU execution RAISED"):
# This should raise because the expected GPU error is wrong
assert_collect_raises(
q,
polars_except=pl.exceptions.InvalidOperationError,
cudf_except=NotImplementedError,
)

with pytest.raises(AssertionError):
with pytest.raises(AssertionError, match="CPU execution RAISED"):
# This should raise because the expected CPU error is wrong
assert_collect_raises(
q,
polars_except=NotImplementedError,
cudf_except=pl.exceptions.InvalidOperationError,
)

with monkeypatch.context() as m:
m.setattr(Select, "evaluate", lambda self, cache: DataFrame([]))
# This query should fail, but we monkeypatch a bad
# implementation of Select which "succeeds" to check that our
# assertion notices this case.
q = df.select(pl.col("a") + pl.Series([1, 2]))
with pytest.raises(AssertionError):
assert_collect_raises(
q,
polars_except=pl.exceptions.ComputeError,
cudf_except=pl.exceptions.ComputeError,
)

0 comments on commit f703265

Please sign in to comment.