Skip to content

Commit

Permalink
Merge branch 'mukernels_fixedwidth_optimize' of https://github.com/pm…
Browse files Browse the repository at this point in the history
…attione-nvidia/cudf into mukernels_fixedwidth_optimize
  • Loading branch information
pmattione-nvidia committed Oct 4, 2024
2 parents 4471022 + c0ed2cb commit c2139ef
Show file tree
Hide file tree
Showing 126 changed files with 4,049 additions and 1,002 deletions.
128 changes: 57 additions & 71 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,66 +43,52 @@ jobs:
with:
needs: ${{ toJSON(needs) }}
changed-files:
runs-on: ubuntu-latest
name: "Check changed files"
outputs:
test_cpp: ${{ steps.changed-files.outputs.cpp_any_changed == 'true' }}
test_java: ${{ steps.changed-files.outputs.java_any_changed == 'true' }}
test_notebooks: ${{ steps.changed-files.outputs.notebooks_any_changed == 'true' }}
test_python: ${{ steps.changed-files.outputs.python_any_changed == 'true' }}
steps:
- name: Get PR info
id: get-pr-info
uses: nv-gha-runners/get-pr-info@main
- name: Checkout code repo
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Calculate merge base
id: calculate-merge-base
env:
PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
run: |
(echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") > "$GITHUB_OUTPUT"
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v45
with:
base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
files_yaml: |
cpp:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
- '!img/**'
- '!java/**'
- '!notebooks/**'
- '!python/**'
java:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
- '!img/**'
- '!notebooks/**'
- '!python/**'
notebooks:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!java/**'
python:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
- '!img/**'
- '!java/**'
- '!notebooks/**'
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
files_yaml: |
test_cpp:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
- '!docs/**'
- '!img/**'
- '!java/**'
- '!notebooks/**'
- '!python/**'
test_cudf_pandas:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!docs/**'
- '!img/**'
- '!java/**'
- '!notebooks/**'
test_java:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
- '!docs/**'
- '!img/**'
- '!notebooks/**'
- '!python/**'
test_notebooks:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
- '!java/**'
test_python:
- '**'
- '!CONTRIBUTING.md'
- '!README.md'
- '!ci/cudf_pandas_scripts/**'
- '!docs/**'
- '!img/**'
- '!java/**'
- '!notebooks/**'
checks:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand All @@ -125,7 +111,7 @@ jobs:
needs: [conda-cpp-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_cpp == 'true'
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
with:
build_type: pull-request
conda-python-build:
Expand All @@ -138,7 +124,7 @@ jobs:
needs: [conda-python-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
build_type: pull-request
script: "ci/test_python_cudf.sh"
Expand All @@ -147,15 +133,15 @@ jobs:
needs: [conda-python-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
build_type: pull-request
script: "ci/test_python_other.sh"
conda-java-tests:
needs: [conda-cpp-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_java == 'true'
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
Expand All @@ -176,7 +162,7 @@ jobs:
needs: [conda-python-build, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_notebooks == 'true'
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
Expand Down Expand Up @@ -220,7 +206,7 @@ jobs:
needs: [wheel-build-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
build_type: pull-request
script: ci/test_wheel_cudf.sh
Expand All @@ -237,7 +223,7 @@ jobs:
needs: [wheel-build-cudf-polars, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
Expand All @@ -248,7 +234,7 @@ jobs:
cudf-polars-polars-tests:
needs: wheel-build-cudf-polars
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
Expand All @@ -269,7 +255,7 @@ jobs:
needs: [wheel-build-dask-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
Expand All @@ -289,7 +275,7 @@ jobs:
needs: [wheel-build-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
Expand All @@ -300,7 +286,7 @@ jobs:
needs: [wheel-build-cudf, changed-files]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: needs.changed-files.outputs.test_python == 'true'
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
Expand Down
11 changes: 9 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf pylibcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings"
HELP="$0 [clean] [libcudf] [pylibcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
VALIDARGS="clean libcudf pylibcudf cudf cudf_polars cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings"
HELP="$0 [clean] [libcudf] [pylibcudf] [cudf] [cudf_polars] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
libcudf - build the cudf C++ code only
pylibcudf - build the pylibcudf Python package
cudf - build the cudf Python package
cudf_polars - build the cudf_polars Python package
cudfjar - build cudf JAR with static libcudf using devtoolset toolchain
dask_cudf - build the dask_cudf Python package
benchmarks - build benchmarks
Expand Down Expand Up @@ -353,6 +354,12 @@ if buildAll || hasArg cudf; then
python ${PYTHON_ARGS_FOR_INSTALL} .
fi

# Build and install the cudf_polars Python package
if buildAll || hasArg cudf_polars; then

cd ${REPODIR}/python/cudf_polars
python ${PYTHON_ARGS_FOR_INSTALL} .
fi

# Build and install the dask_cudf Python package
if buildAll || hasArg dask_cudf; then
Expand Down
2 changes: 1 addition & 1 deletion ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ rapids-generate-version > ./VERSION

cd "${package_dir}"

python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check
64 changes: 50 additions & 14 deletions ci/cudf_pandas_scripts/pandas-tests/job-summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,33 @@ def emoji_failed(x):
# convert pr_results to a pandas DataFrame and then a markdown table
pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
diff_df = pr_df - main_df
total_usage = pr_df['_slow_function_call'] + pr_df['_fast_function_call']
pr_df['CPU Usage'] = ((pr_df['_slow_function_call']/total_usage)*100.0).round(1)
pr_df['GPU Usage'] = ((pr_df['_fast_function_call']/total_usage)*100.0).round(1)
total_usage = main_df["_slow_function_call"] + main_df["_fast_function_call"]
main_df["CPU Usage"] = ((main_df["_slow_function_call"] / total_usage) * 100.0).round(1)
main_df["GPU Usage"] = ((main_df["_fast_function_call"] / total_usage) * 100.0).round(1)

total_usage = pr_df["_slow_function_call"] + pr_df["_fast_function_call"]
pr_df["CPU Usage"] = ((pr_df["_slow_function_call"] / total_usage) * 100.0).round(1)
pr_df["GPU Usage"] = ((pr_df["_fast_function_call"] / total_usage) * 100.0).round(1)

cpu_usage_mean = pr_df["CPU Usage"].mean().round(2)
gpu_usage_mean = pr_df["GPU Usage"].mean().round(2)

gpu_usage_rate_change = abs(pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean())
pr_df["CPU Usage"] = pr_df["CPU Usage"].fillna(0)
pr_df["GPU Usage"] = pr_df["GPU Usage"].fillna(0)
main_df["CPU Usage"] = main_df["CPU Usage"].fillna(0)
main_df["GPU Usage"] = main_df["GPU Usage"].fillna(0)

cpu_usage_mean = pr_df['CPU Usage'].mean().round(2)
gpu_usage_mean = pr_df['GPU Usage'].mean().round(2)
diff_df = pr_df - main_df
diff_df["CPU Usage"] = diff_df["CPU Usage"].round(1).fillna(0)
diff_df["GPU Usage"] = diff_df["GPU Usage"].round(1).fillna(0)

# Add '%' suffix to 'CPU Usage' and 'GPU Usage' columns
pr_df['CPU Usage'] = pr_df['CPU Usage'].fillna(0).astype(str) + '%'
pr_df['GPU Usage'] = pr_df['GPU Usage'].fillna(0).astype(str) + '%'
# Add '%' suffix to "CPU Usage" and "GPU Usage" columns
pr_df["CPU Usage"] = pr_df["CPU Usage"].astype(str) + "%"
pr_df["GPU Usage"] = pr_df["GPU Usage"].astype(str) + "%"

pr_df = pr_df[["total", "passed", "failed", "skipped", 'CPU Usage', 'GPU Usage']]
diff_df = diff_df[["total", "passed", "failed", "skipped"]]
pr_df = pr_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
diff_df = diff_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]]
diff_df.columns = diff_df.columns + "_diff"
diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed)
Expand All @@ -99,13 +112,36 @@ def emoji_failed(x):
"passed_diff": "Passed delta",
"failed_diff": "Failed delta",
"skipped_diff": "Skipped delta",
"CPU Usage_diff": "CPU Usage delta",
"GPU Usage_diff": "GPU Usage delta",
}
)
df = df.sort_values(by=["Failed tests", "Skipped tests"], ascending=False)

df = df.sort_values(by=["CPU Usage delta", "Total tests"], ascending=False)
df["CPU Usage delta"] = df["CPU Usage delta"].map(emoji_failed)
df["GPU Usage delta"] = df["GPU Usage delta"].map(emoji_passed)
df = df[
[
"Total tests",
"CPU Usage delta",
"GPU Usage delta",
"Passed tests",
"Failed tests",
"Skipped tests",
"CPU Usage",
"GPU Usage",
"Total delta",
"Passed delta",
"Failed delta",
"Skipped delta",
]
]
print(comment)
print()
print(f"Average CPU and GPU usage for the tests: {cpu_usage_mean}% and {gpu_usage_mean}%")
print(
f"Average GPU usage: {gpu_usage_mean}% {'an increase' if gpu_usage_rate_change > 0 else 'a decrease'} by {gpu_usage_rate_change}%"
)
print()
print(f"Average CPU usage: {cpu_usage_mean}%")
print()
print("Here are the results of running the Pandas tests against this PR:")
print()
Expand Down
1 change: 1 addition & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ for FILE in .github/workflows/*.yaml .github/workflows/*.yml; do
sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
done
sed_runner "s/branch-[0-9]\+\.[0-9]\+/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_cudf_polars.sh
sed_runner "s/branch-[0-9]\+\.[0-9]\+/branch-${NEXT_SHORT_TAG}/g" ci/test_cudf_polars_polars_tests.sh

# Java files
NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}-SNAPSHOT"
Expand Down
2 changes: 1 addition & 1 deletion ci/run_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ python -m pytest \
-m "" \
-p cudf_polars.testing.plugin \
-v \
--tb=short \
--tb=native \
${DESELECTED_TESTS} \
"$@" \
py-polars/tests
5 changes: 2 additions & 3 deletions ci/test_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set -eou pipefail
# files in cudf_polars/pylibcudf", rather than "are there changes
# between upstream and this branch which touch cudf_polars/pylibcudf"
# TODO: is the target branch exposed anywhere in an environment variable?
if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ];
if [ -n "$(git diff --name-only origin/branch-24.12...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ];
then
HAS_CHANGES=1
rapids-logger "PR has changes in cudf-polars/pylibcudf, test fails treated as failure"
Expand All @@ -33,8 +33,7 @@ python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl
rapids-logger "Install cudf_polars"
python -m pip install $(echo ./dist/cudf_polars*.whl)

# TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
TAG="py-1.7.0"
TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
rapids-logger "Clone polars to ${TAG}"
git clone https://github.com/pola-rs/polars.git --branch ${TAG} --depth 1

Expand Down
5 changes: 1 addition & 4 deletions ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,14 @@ if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
| tee ./constraints.txt
fi

# echo to expand wildcard before adding `[extra]` requires for pip
# echo to expand wildcard before adding `[test]` requires for pip
python -m pip install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"

rapids-logger "Pin to 1.7.0 Temporarily"
python -m pip install polars==1.7.0

rapids-logger "Run cudf_polars tests"

function set_exitcode()
Expand Down
Loading

0 comments on commit c2139ef

Please sign in to comment.