Skip to content

Commit

Permalink
Make tests deterministic (#16910)
Browse files Browse the repository at this point in the history
This PR is a first pass of making tests deterministic, I noticed one of CI job failed due to an overflow error related to random data generation.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Richard (Rick) Zamora (https://github.com/rjzamora)

URL: #16910
  • Loading branch information
galipremsagar authored Sep 25, 2024
1 parent 987fea3 commit ba4afae
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 16 deletions.
26 changes: 12 additions & 14 deletions python/cudf/cudf/tests/test_array_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ def __array_function__(self, *args, **kwargs):

missing_arrfunc_reason = "NEP-18 support is not available in NumPy"

np.random.seed(0)


@pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
@pytest.mark.parametrize("np_ar", [np.random.random(100)])
@pytest.mark.parametrize(
"func",
[
Expand All @@ -47,7 +48,8 @@ def __array_function__(self, *args, **kwargs):
lambda x: np.linalg.norm(x),
],
)
def test_array_func_cudf_series(np_ar, func):
def test_array_func_cudf_series(func):
np_ar = np.random.random(100)
cudf_ser = cudf.Series(np_ar)
expect = func(np_ar)
got = func(cudf_ser)
Expand All @@ -58,9 +60,6 @@ def test_array_func_cudf_series(np_ar, func):


@pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
@pytest.mark.parametrize(
"pd_df", [pd.DataFrame(np.random.uniform(size=(100, 10)))]
)
@pytest.mark.parametrize(
"func",
[
Expand All @@ -74,17 +73,15 @@ def test_array_func_cudf_series(np_ar, func):
lambda x: np.prod(x, axis=1),
],
)
def test_array_func_cudf_dataframe(pd_df, func):
def test_array_func_cudf_dataframe(func):
pd_df = pd.DataFrame(np.random.uniform(size=(100, 10)))
cudf_df = cudf.from_pandas(pd_df)
expect = func(pd_df)
got = func(cudf_df)
assert_eq(expect, got)


@pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
@pytest.mark.parametrize(
"pd_df", [pd.DataFrame(np.random.uniform(size=(100, 10)))]
)
@pytest.mark.parametrize(
"func",
[
Expand All @@ -93,21 +90,22 @@ def test_array_func_cudf_dataframe(pd_df, func):
lambda x: np.linalg.det(x),
],
)
def test_array_func_missing_cudf_dataframe(pd_df, func):
def test_array_func_missing_cudf_dataframe(func):
pd_df = pd.DataFrame(np.random.uniform(size=(100, 10)))
cudf_df = cudf.from_pandas(pd_df)
with pytest.raises(TypeError):
func(cudf_df)


@pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
@pytest.mark.parametrize("np_ar", [np.random.random(100)])
@pytest.mark.parametrize(
"func",
[
lambda x: np.unique(x),
],
)
def test_array_func_cudf_index(np_ar, func):
def test_array_func_cudf_index(func):
np_ar = np.random.random(100)
cudf_index = cudf.Index(cudf.Series(np_ar))
expect = func(np_ar)
got = func(cudf_index)
Expand All @@ -118,7 +116,6 @@ def test_array_func_cudf_index(np_ar, func):


@pytest.mark.skipif(missing_arrfunc_cond, reason=missing_arrfunc_reason)
@pytest.mark.parametrize("np_ar", [np.random.random(100)])
@pytest.mark.parametrize(
"func",
[
Expand All @@ -127,7 +124,8 @@ def test_array_func_cudf_index(np_ar, func):
lambda x: np.linalg.det(x),
],
)
def test_array_func_missing_cudf_index(np_ar, func):
def test_array_func_missing_cudf_index(func):
np_ar = np.random.random(100)
cudf_index = cudf.Index(cudf.Series(np_ar))
with pytest.raises(TypeError):
func(cudf_index)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def test_avro_compression(rows, codec):
},
],
rows,
seed=0,
)
expected_df = cudf.DataFrame.from_arrow(df)

Expand Down Expand Up @@ -599,7 +600,7 @@ def test_avro_reader_multiblock(
else:
assert dtype in ("float32", "float64")
avro_type = "float" if dtype == "float32" else "double"

np.random.seed(0)
# We don't use rand_dataframe() here, because it increases the
# execution time of each test by a factor of 10 or more (it appears
# to use a very costly approach to generating random data).
Expand Down
7 changes: 7 additions & 0 deletions python/cudf/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2470,6 +2470,7 @@ def test_groupby_2keys_rank(nelem, method, ascending, na_option, pct):
],
rows=nelem,
use_threads=False,
seed=0,
)
pdf = t.to_pandas()
pdf.columns = ["x", "y", "z"]
Expand Down Expand Up @@ -2602,6 +2603,7 @@ def test_groupby_shift_row_mixed_numerics(
],
rows=nelem,
use_threads=False,
seed=0,
)
pdf = t.to_pandas()
gdf = cudf.from_pandas(pdf)
Expand Down Expand Up @@ -2639,6 +2641,7 @@ def test_groupby_shift_row_mixed(nelem, shift_perc, direction):
],
rows=nelem,
use_threads=False,
seed=0,
)
pdf = t.to_pandas()
gdf = cudf.from_pandas(pdf)
Expand Down Expand Up @@ -2687,6 +2690,7 @@ def test_groupby_shift_row_mixed_fill(
],
rows=nelem,
use_threads=False,
seed=0,
)
pdf = t.to_pandas()
gdf = cudf.from_pandas(pdf)
Expand Down Expand Up @@ -2732,6 +2736,7 @@ def test_groupby_shift_row_zero_shift(nelem, fill_value):
],
rows=nelem,
use_threads=False,
seed=0,
)
gdf = cudf.from_pandas(t.to_pandas())

Expand Down Expand Up @@ -2782,6 +2787,7 @@ def test_groupby_diff_row_mixed_numerics(nelem, shift_perc, direction):
],
rows=nelem,
use_threads=False,
seed=0,
)
pdf = t.to_pandas()
gdf = cudf.from_pandas(pdf)
Expand Down Expand Up @@ -2815,6 +2821,7 @@ def test_groupby_diff_row_zero_shift(nelem):
],
rows=nelem,
use_threads=False,
seed=0,
)
gdf = cudf.from_pandas(t.to_pandas())

Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/dask_cudf/tests/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@


def _make_random_frame(nelem, npartitions=2):
np.random.seed(0)
df = pd.DataFrame(
{
"x": np.random.randint(0, 5, size=nelem),
Expand All @@ -38,7 +39,6 @@ def wrapped(series):
@pytest.mark.parametrize("reducer", _reducers)
def test_series_reduce(reducer):
reducer = _get_reduce_fn(reducer)
np.random.seed(0)
size = 10
df, gdf = _make_random_frame(size)

Expand Down

0 comments on commit ba4afae

Please sign in to comment.