Skip to content

Commit

Permalink
Address dask_cudf.read_csv chunksize deprecation (#4379)
Browse files Browse the repository at this point in the history
xref #4271

`chunksize` was deprecated in favor of `blocksize`

Also removed an unsupported `chunksize` in a `cudf.read_csv` call

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Don Acosta (https://github.com/acostadon)
  - Rick Ratzel (https://github.com/rlratzel)

URL: #4379
  • Loading branch information
mroeschke authored May 8, 2024
1 parent 9e3f745 commit 78227b3
Show file tree
Hide file tree
Showing 38 changed files with 84 additions and 86 deletions.
4 changes: 2 additions & 2 deletions benchmarks/cugraph/standalone/cugraph_dask_funcs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -44,7 +44,7 @@ def read_csv(input_csv_file, scale):
chunksize = cugraph.dask.get_chunksize(input_csv_file)
return dask_cudf.read_csv(
input_csv_file,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
# names=names,
dtype=dtypes,
Expand Down
4 changes: 1 addition & 3 deletions benchmarks/cugraph/standalone/cugraph_funcs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -75,10 +75,8 @@ def read_csv(input_csv_file, scale):
dtypes = [vertex_t, vertex_t, "float32"]
names = (["src", "dst", "weight"],)

chunksize = cugraph.dask.get_chunksize(input_csv_file)
return cudf.read_csv(
input_csv_file,
chunksize=chunksize,
delimiter=" ",
# names=names,
dtype=dtypes,
Expand Down
2 changes: 1 addition & 1 deletion docs/cugraph/source/api_docs/cugraph/dask-cugraph.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Example
# multi-GPU CSV reader
e_list = dask_cudf.read_csv(
input_data_path,
chunksize=chunksize,
blocksize=chunksize,
names=['src', 'dst'],
dtype=['int32', 'int32'],
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -204,7 +204,7 @@ def betweenness_centrality(
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph(directed=True)
Expand Down Expand Up @@ -362,7 +362,7 @@ def edge_betweenness_centrality(
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph(directed=True)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -104,7 +104,7 @@ def eigenvector_centrality(input_graph, max_iter=100, tol=1.0e-6):
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph()
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/dask/centrality/katz_centrality.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -133,7 +133,7 @@ def katz_centrality(
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph(directed=True)
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/dask/community/leiden.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def leiden(
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph()
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/dask/community/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def louvain(
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph()
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/dask/components/connectivity.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -78,7 +78,7 @@ def weakly_connected_components(input_graph):
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph(directed=False)
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/dask/cores/k_core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -109,7 +109,7 @@ def k_core(input_graph, k=None, core_number=None, degree_type="bidirectional"):
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph(directed=False)
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/dask/link_analysis/hits.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -118,7 +118,7 @@ def hits(input_graph, tol=1.0e-5, max_iter=100, nstart=None, normalized=True):
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph(directed=True)
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/dask/link_analysis/pagerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def pagerank(
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph(directed=True)
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/dask/traversal/bfs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -106,7 +106,7 @@ def bfs(input_graph, start, depth_limit=None, return_distances=True, check_start
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph(directed=True)
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/dask/traversal/sssp.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -89,7 +89,7 @@ def sssp(input_graph, source, cutoff=None, check_source=True):
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv")
>>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv",
... chunksize=chunksize, delimiter=" ",
... blocksize=chunksize, delimiter=" ",
... names=["src", "dst", "value"],
... dtype=["int32", "int32", "float32"])
>>> dg = cugraph.Graph(directed=True)
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/structure/symmetrize.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def symmetrize_ddf(
>>> # Init a DASK Cluster
>>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/..
>>> # chunksize = dcg.get_chunksize(datasets / 'karate.csv')
>>> # ddf = dask_cudf.read_csv(datasets/'karate.csv', chunksize=chunksize,
>>> # ddf = dask_cudf.read_csv(datasets/'karate.csv', blocksize=chunksize,
>>> # delimiter=' ',
>>> # names=['src', 'dst', 'weight'],
>>> # dtype=['int32', 'int32', 'float32'])
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/testing/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -245,7 +245,7 @@ def read_dask_cudf_csv_file(csv_file, read_weights_in_sp=True, single_partition=
chunksize = os.path.getsize(csv_file)
return dask_cudf.read_csv(
csv_file,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "weight"],
dtype=["int32", "int32", "float32"],
Expand All @@ -264,7 +264,7 @@ def read_dask_cudf_csv_file(csv_file, read_weights_in_sp=True, single_partition=
chunksize = os.path.getsize(csv_file)
return dask_cudf.read_csv(
csv_file,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "weight"],
dtype=["int32", "int32", "float32"],
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/tests/comms/test_comms_mg.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -53,7 +53,7 @@ def test_dask_mg_pagerank(dask_client, directed):

ddf1 = dask_cudf.read_csv(
input_data_path1,
chunksize=chunksize1,
blocksize=chunksize1,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand All @@ -66,7 +66,7 @@ def test_dask_mg_pagerank(dask_client, directed):

ddf2 = dask_cudf.read_csv(
input_data_path2,
chunksize=chunksize2,
blocksize=chunksize2,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/tests/community/test_leiden_mg.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -67,7 +67,7 @@ def daskGraphFromDataset(request, dask_client):
chunksize = dcg.get_chunksize(dataset)
ddf = dask_cudf.read_csv(
dataset,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down Expand Up @@ -96,7 +96,7 @@ def uddaskGraphFromDataset(request, dask_client):
chunksize = dcg.get_chunksize(dataset)
ddf = dask_cudf.read_csv(
dataset,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/tests/community/test_louvain_mg.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -67,7 +67,7 @@ def daskGraphFromDataset(request, dask_client):
chunksize = dcg.get_chunksize(dataset)
ddf = dask_cudf.read_csv(
dataset,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down Expand Up @@ -96,7 +96,7 @@ def uddaskGraphFromDataset(request, dask_client):
chunksize = dcg.get_chunksize(dataset)
ddf = dask_cudf.read_csv(
dataset,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -88,7 +88,7 @@ def input_expected_output(dask_client, input_combo):
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
input_data_path,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -48,7 +48,7 @@ def test_dask_mg_wcc(dask_client, directed):

ddf = dask_cudf.read_csv(
input_data_path,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/tests/core/test_core_number_mg.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -78,7 +78,7 @@ def input_expected_output(dask_client, input_combo):
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
input_data_path,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down Expand Up @@ -143,7 +143,7 @@ def test_core_number_invalid_input(input_expected_output):
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
input_data_path,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/tests/core/test_k_core_mg.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -98,7 +98,7 @@ def input_expected_output(dask_client, input_combo):
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
input_data_path,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down Expand Up @@ -164,7 +164,7 @@ def test_dask_mg_k_core_invalid_input(dask_client):
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
input_data_path,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -372,7 +372,7 @@ def net_MGPropertyGraph(dask_client):
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
input_data_path,
chunksize=chunksize,
blocksize=chunksize,
delimiter=" ",
names=["src", "dst", "value"],
dtype=["int32", "int32", "float32"],
Expand Down
Loading

0 comments on commit 78227b3

Please sign in to comment.