Skip to content

Commit

Permalink
Merge branch 'branch-24.06' into use-new-runners
Browse files Browse the repository at this point in the history
  • Loading branch information
ajschmidt8 authored Apr 10, 2024
2 parents e5866ca + 460b41e commit e18679b
Show file tree
Hide file tree
Showing 32 changed files with 487 additions and 161 deletions.
13 changes: 9 additions & 4 deletions .github/workflows/status.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,18 @@ jobs:
state: CUSTOM_STATE = 'success'
} = contentJSON;
// Fetch the first job ID from the workflow run
const jobs = await github.rest.actions.listJobsForWorkflowRun({
// Fetch all jobs using pagination
const jobs = await github.paginate(
github.rest.actions.listJobsForWorkflowRun,
{
owner: context.repo.owner,
repo: context.repo.repo,
run_id: process.env.WORKFLOW_RUN_ID,
});
const job = jobs.data.jobs.find(job => job.name === JOB_NAME);
}
);
// Fetch the first job ID from the workflow run
const job = jobs.find(job => job.name === JOB_NAME);
const JOB_ID = job ? job.id : null;
// Set default target URL if not defined
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ jobs:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@use-new-runners
with:
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(min_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
Expand Down
297 changes: 297 additions & 0 deletions CHANGELOG.md

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions ci/cudf_pandas_scripts/pandas-tests/diff.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
GH_JOB_NAME="pandas-tests-diff / build"
rapids-logger "Github job name: ${GH_JOB_NAME}"

MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.main-results.json
PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py39.pr-results.json
PY_VER="39"
MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py${PY_VER}.main-results.json
PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py${PY_VER}.pr-results.json

rapids-logger "Fetching latest available results from nightly"
aws s3api list-objects-v2 --bucket rapids-downloads --prefix "nightly/" --query "sort_by(Contents[?ends_with(Key, '.main-results.json')], &LastModified)[::-1].[Key]" --output text > s3_output.txt
cat s3_output.txt
aws s3api list-objects-v2 --bucket rapids-downloads --prefix "nightly/" --query "sort_by(Contents[?ends_with(Key, '_py${PY_VER}.main-results.json')], &LastModified)[::-1].[Key]" --output text > s3_output.txt

read -r COMPARE_ENV < s3_output.txt
export COMPARE_ENV
rapids-logger "Latest available results from nightly: ${COMPARE_ENV}"
Expand Down
4 changes: 3 additions & 1 deletion cpp/examples/build.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#!/bin/bash

# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

# libcudf examples build script

set -euo pipefail

# Parallelism control
PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}

Expand Down
4 changes: 3 additions & 1 deletion cpp/examples/strings/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cudf/column/column_view.hpp>
#include <cudf/io/csv.hpp>
#include <cudf/io/datasource.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>

Expand Down Expand Up @@ -110,7 +111,8 @@ int main(int argc, char const** argv)

std::chrono::duration<double> elapsed = std::chrono::steady_clock::now() - st;
std::cout << "Wall time: " << elapsed.count() << " seconds\n";
std::cout << "Output size " << result->view().child(1).size() << " bytes\n";
auto const scv = cudf::strings_column_view(result->view());
std::cout << "Output size " << scv.chars_size(rmm::cuda_stream_default) << " bytes\n";

return 0;
}
8 changes: 6 additions & 2 deletions cpp/examples/strings/custom_optimized.cu
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,12 @@ std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,
redact_kernel<<<blocks, block_size, 0, stream.value()>>>(
*d_names, *d_visibilities, offsets.data(), chars.data());

// create column from offsets and chars vectors (no copy is performed)
auto result = cudf::make_strings_column(names.size(), std::move(offsets), chars.release(), {}, 0);
// create column from offsets vector (move only)
auto offsets_column = std::make_unique<cudf::column>(std::move(offsets), rmm::device_buffer{}, 0);

// create column for chars vector (no copy is performed)
auto result = cudf::make_strings_column(
names.size(), std::move(offsets_column), chars.release(), 0, rmm::device_buffer{});

// wait for all of the above to finish
stream.synchronize();
Expand Down
1 change: 1 addition & 0 deletions cpp/src/io/parquet/page_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1896,6 +1896,7 @@ CUDF_KERNEL void __launch_bounds__(block_size, 8)
s->rle_out = dst + RLE_LENGTH_FIELD_LEN;
s->rle_len_pos = dst;
}
s->cur = s->rle_out;
s->page_start_val = row_to_value_idx(s->page.start_row, s->col);
s->chunk_start_val = row_to_value_idx(s->ck.start_row, s->col);
}
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_fuzz_testing/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def set_rand_params(self, params):
if dtype_val is not None:
dtype_val = {
col_name: "category"
if cudf.utils.dtypes._is_categorical_dtype(dtype)
if isinstance(dtype, cudf.CategoricalDtype)
else pandas_dtypes_to_np_dtypes[dtype]
for col_name, dtype in dtype_val.items()
}
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_fuzz_testing/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _get_dtype_param_value(dtype_val):
if dtype_val is not None and isinstance(dtype_val, abc.Mapping):
processed_dtypes = {}
for col_name, dtype in dtype_val.items():
if cudf.utils.dtypes._is_categorical_dtype(dtype):
if isinstance(dtype, cudf.CategoricalDtype):
processed_dtypes[col_name] = "category"
else:
processed_dtypes[col_name] = str(
Expand Down
15 changes: 7 additions & 8 deletions python/cudf/cudf/_lib/csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -434,19 +434,19 @@ def read_csv(
if dtype is not None:
if isinstance(dtype, abc.Mapping):
for k, v in dtype.items():
if cudf.api.types._is_categorical_dtype(v):
if isinstance(cudf.dtype(v), cudf.CategoricalDtype):
df._data[str(k)] = df._data[str(k)].astype(v)
elif (
cudf.api.types.is_scalar(dtype) or
isinstance(dtype, (
np.dtype, pd.api.extensions.ExtensionDtype, type
))
):
if cudf.api.types._is_categorical_dtype(dtype):
if isinstance(cudf.dtype(dtype), cudf.CategoricalDtype):
df = df.astype(dtype)
elif isinstance(dtype, abc.Collection):
for index, col_dtype in enumerate(dtype):
if cudf.api.types._is_categorical_dtype(col_dtype):
if isinstance(cudf.dtype(col_dtype), cudf.CategoricalDtype):
col_name = df._data.names[index]
df._data[col_name] = df._data[col_name].astype(col_dtype)

Expand Down Expand Up @@ -554,11 +554,10 @@ cdef data_type _get_cudf_data_type_from_dtype(object dtype) except *:
# TODO: Remove this work-around Dictionary types
# in libcudf are fully mapped to categorical columns:
# https://github.com/rapidsai/cudf/issues/3960
if cudf.api.types._is_categorical_dtype(dtype):
if isinstance(dtype, str):
dtype = "str"
else:
dtype = dtype.categories.dtype
if isinstance(dtype, cudf.CategoricalDtype):
dtype = dtype.categories.dtype
elif dtype == "category":
dtype = "str"

if isinstance(dtype, str):
if str(dtype) == "date32":
Expand Down
9 changes: 8 additions & 1 deletion python/cudf/cudf/_lib/pylibcudf/types.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,14 @@ cdef class DataType:
The scale associated with the data. Only used for decimal data types.
"""
def __cinit__(self, type_id id, int32_t scale=0):
self.c_obj = data_type(id, scale)
if (
id == type_id.DECIMAL32
or id == type_id.DECIMAL64
or id == type_id.DECIMAL128
):
self.c_obj = data_type(id, scale)
else:
self.c_obj = data_type(id)

# TODO: Consider making both id and scale cached properties.
cpdef type_id id(self):
Expand Down
53 changes: 18 additions & 35 deletions python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def categories(self) -> "cudf.core.index.Index":
"""
The categories of this categorical.
"""
return cudf.core.index.as_index(self._column.categories)
return self._column.dtype.categories

@property
def codes(self) -> "cudf.Series":
Expand Down Expand Up @@ -165,7 +165,7 @@ def as_ordered(self) -> Optional[SeriesOrIndex]:
dtype: category
Categories (3, int64): [1 < 2 < 10]
"""
return self._return_or_inplace(self._column.as_ordered())
return self._return_or_inplace(self._column.as_ordered(ordered=True))

def as_unordered(self) -> Optional[SeriesOrIndex]:
"""
Expand Down Expand Up @@ -212,8 +212,7 @@ def as_unordered(self) -> Optional[SeriesOrIndex]:
dtype: category
Categories (3, int64): [1, 2, 10]
"""

return self._return_or_inplace(self._column.as_unordered())
return self._return_or_inplace(self._column.as_ordered(ordered=False))

def add_categories(self, new_categories: Any) -> Optional[SeriesOrIndex]:
"""
Expand Down Expand Up @@ -631,10 +630,6 @@ def codes(self) -> NumericalColumn:
def ordered(self) -> bool:
return self.dtype.ordered

@ordered.setter
def ordered(self, value: bool):
self.dtype.ordered = value

def __setitem__(self, key, value):
if cudf.api.types.is_scalar(
value
Expand Down Expand Up @@ -1170,9 +1165,11 @@ def _get_decategorized_column(self) -> ColumnBase:
def copy(self, deep: bool = True) -> Self:
result_col = super().copy(deep=deep)
if deep:
result_col.categories = libcudf.copying.copy_column(
self.dtype._categories
dtype_copy = CategoricalDtype(
categories=self.categories.copy(),
ordered=self.ordered,
)
result_col = cast(Self, result_col._with_type_metadata(dtype_copy))
return result_col

@cached_property
Expand Down Expand Up @@ -1411,31 +1408,17 @@ def reorder_categories(
)
return self._set_categories(new_categories, ordered=ordered)

def as_ordered(self):
out_col = self
if not out_col.ordered:
out_col = column.build_categorical_column(
categories=self.categories,
codes=self.codes,
mask=self.base_mask,
size=self.base_size,
offset=self.offset,
ordered=True,
)
return out_col

def as_unordered(self):
out_col = self
if out_col.ordered:
out_col = column.build_categorical_column(
categories=self.categories,
codes=self.codes,
mask=self.base_mask,
size=self.base_size,
offset=self.offset,
ordered=False,
)
return out_col
def as_ordered(self, ordered: bool):
if self.dtype.ordered == ordered:
return self
return column.build_categorical_column(
categories=self.categories,
codes=self.codes,
mask=self.base_mask,
size=self.base_size,
offset=self.offset,
ordered=ordered,
)


def _create_empty_categorical_column(
Expand Down
7 changes: 4 additions & 3 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
from cudf._lib.types import size_type_dtype
from cudf._typing import ColumnLike, Dtype, ScalarLike
from cudf.api.types import (
_is_categorical_dtype,
_is_non_decimal_numeric_dtype,
_is_pandas_nullable_extension_dtype,
infer_dtype,
Expand Down Expand Up @@ -1381,7 +1380,7 @@ def column_empty_like(

if (
hasattr(column, "dtype")
and _is_categorical_dtype(column.dtype)
and isinstance(column.dtype, cudf.CategoricalDtype)
and dtype == column.dtype
):
catcolumn = cast("cudf.core.column.CategoricalColumn", column)
Expand Down Expand Up @@ -2008,7 +2007,9 @@ def as_column(
length = 1
elif length < 0:
raise ValueError(f"{length=} must be >=0.")
if isinstance(arbitrary, pd.Interval) or _is_categorical_dtype(dtype):
if isinstance(
arbitrary, pd.Interval
) or cudf.api.types._is_categorical_dtype(dtype):
# No cudf.Scalar support yet
return as_column(
pd.Series([arbitrary] * length),
Expand Down
14 changes: 9 additions & 5 deletions python/cudf/cudf/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ def dtype(arbitrary):
raise TypeError(f"Unsupported type {np_dtype}")
return np_dtype

if isinstance(arbitrary, str) and arbitrary in {"hex", "hex32", "hex64"}:
# read_csv only accepts "hex"
# e.g. test_csv_reader_hexadecimals, test_csv_reader_hexadecimal_overflow
return arbitrary

# use `pandas_dtype` to try and interpret
# `arbitrary` as a Pandas extension type.
# Return the corresponding NumPy/cuDF type.
Expand Down Expand Up @@ -205,10 +210,6 @@ def ordered(self) -> bool:
"""
return self._ordered

@ordered.setter
def ordered(self, value) -> None:
self._ordered = value

@classmethod
def from_pandas(cls, dtype: pd.CategoricalDtype) -> "CategoricalDtype":
"""
Expand Down Expand Up @@ -1003,7 +1004,10 @@ def _is_categorical_dtype(obj):
pd.Series,
),
):
return _is_categorical_dtype(obj.dtype)
try:
return isinstance(cudf.dtype(obj.dtype), cudf.CategoricalDtype)
except TypeError:
return False
if hasattr(obj, "type"):
if obj.type is pd.CategoricalDtype.type:
return True
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2624,9 +2624,9 @@ def __init__(
elif isinstance(dtype, (pd.CategoricalDtype, cudf.CategoricalDtype)):
data = data.set_categories(dtype.categories, ordered=ordered)
elif ordered is True and data.ordered is False:
data = data.as_ordered()
data = data.as_ordered(ordered=True)
elif ordered is False and data.ordered is True:
data = data.as_unordered()
data = data.as_ordered(ordered=False)
super().__init__(data, **kwargs)

@property # type: ignore
Expand All @@ -2643,7 +2643,7 @@ def categories(self):
"""
The categories of this categorical.
"""
return as_index(self._values.categories)
return self.dtype.categories

def _is_boolean(self):
return False
Expand Down
Loading

0 comments on commit e18679b

Please sign in to comment.