From 3116eed763acad5808bd355d66406a3a630947f6 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Sun, 26 Nov 2023 21:24:19 -0600 Subject: [PATCH] Moves more MG graph ETL to libcugraph and re-enables MG tests in CI (#3941) This PR includes changes that moves some of the MG graph etl steps (such as computing number of edges) to libcugraph to reduce the amount of dask overhead involved in graph creation. Those ETL steps were also responsible for various dask-related transient errors that caused us to temporarily disable MG testing in CI. These changes allow us to re-enable MG testing in CI, so this PR includes that update too. Authors: - Joseph Nke (https://github.com/jnke2016) - Chuck Hastings (https://github.com/ChuckHastings) - Naim (https://github.com/naimnv) - Vibhu Jawa (https://github.com/VibhuJawa) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Jake Awe (https://github.com/AyodeAwe) - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3941 --- ci/test_python.sh | 2 +- ci/test_wheel.sh | 2 +- cpp/include/cugraph_c/graph.h | 3 +- .../cugraph/cugraph/dask/community/egonet.py | 14 +- .../dask/community/induced_subgraph.py | 19 +- .../cugraph/cugraph/dask/community/leiden.py | 16 +- .../cugraph/cugraph/dask/community/louvain.py | 16 +- .../cugraph/dask/link_analysis/pagerank.py | 17 +- .../cugraph/dask/sampling/random_walks.py | 18 +- python/cugraph/cugraph/dask/traversal/bfs.py | 15 +- .../simpleDistributedGraph.py | 130 +++++---- .../cugraph/cugraph/structure/symmetrize.py | 6 +- python/cugraph/cugraph/testing/mg_utils.py | 2 + python/cugraph/cugraph/tests/conftest.py | 7 +- .../tests/link_analysis/test_hits_mg.py | 2 +- .../cugraph/tests/structure/test_graph_mg.py | 12 +- .../pylibcugraph/_cugraph_c/graph.pxd | 62 +++- .../analyze_clustering_edge_cut.pyx | 2 +- .../analyze_clustering_modularity.pyx | 2 +- .../analyze_clustering_ratio_cut.pyx | 2 +- .../pylibcugraph/balanced_cut_clustering.pyx | 2 +- python/pylibcugraph/pylibcugraph/ecg.pyx | 2 +- .../edge_betweenness_centrality.pyx | 2 +- python/pylibcugraph/pylibcugraph/egonet.pyx | 2 +- .../pylibcugraph/eigenvector_centrality.pyx | 4 +- python/pylibcugraph/pylibcugraph/graphs.pxd | 6 +- python/pylibcugraph/pylibcugraph/graphs.pyx | 267 ++++++++++++------ .../pylibcugraph/induced_subgraph.pyx | 2 +- .../pylibcugraph/k_truss_subgraph.pyx | 2 +- python/pylibcugraph/pylibcugraph/leiden.pyx | 2 +- python/pylibcugraph/pylibcugraph/louvain.pyx | 2 +- python/pylibcugraph/pylibcugraph/node2vec.pyx | 2 +- python/pylibcugraph/pylibcugraph/pagerank.pyx | 2 +- .../pylibcugraph/personalized_pagerank.pyx | 2 +- .../spectral_modularity_maximization.pyx | 2 +- python/pylibcugraph/pylibcugraph/sssp.pyx | 4 +- .../pylibcugraph/tests/conftest.py | 12 +- .../tests/test_eigenvector_centrality.py | 12 +- .../pylibcugraph/tests/test_graph_sg.py | 25 +- .../tests/test_katz_centrality.py | 12 +- .../pylibcugraph/tests/test_louvain.py | 20 +- .../pylibcugraph/tests/test_node2vec.py | 22 +- .../pylibcugraph/tests/test_pagerank.py | 2 +- .../pylibcugraph/tests/test_sssp.py | 2 +- .../pylibcugraph/tests/test_triangle_count.py | 22 +- .../tests/test_uniform_neighbor_sample.py | 30 +- .../pylibcugraph/tests/test_utils.py | 2 +- .../pylibcugraph/uniform_random_walks.pyx | 2 +- .../weakly_connected_components.pyx | 2 +- 49 files changed, 521 insertions(+), 298 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 273d3c93482..d6e92e8d1a5 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -79,7 +79,7 @@ pytest \ --cov=cugraph \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-coverage.xml" \ --cov-report=term \ - -k "not _mg" \ + -k "not test_property_graph_mg" \ tests popd diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 28f59f0209e..428efd4ed21 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -26,5 +26,5 @@ else DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL="1000s" \ DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="1000s" \ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT="1000s" \ - python -m pytest -k "not _mg" ./python/${package_name}/${python_package_name}/tests + python -m pytest ./python/${package_name}/${python_package_name}/tests fi diff --git a/cpp/include/cugraph_c/graph.h b/cpp/include/cugraph_c/graph.h index 88176a9c1b6..00fce0493a3 100644 --- a/cpp/include/cugraph_c/graph.h +++ b/cpp/include/cugraph_c/graph.h @@ -103,7 +103,8 @@ cugraph_error_code_t cugraph_sg_graph_create( * Note that setting this flag will arbitrarily select one instance of a multi edge to be the * edge that survives. If the edges have properties that should be honored (e.g. sum the weights, - * or take the maximum weight), the caller should do that on not rely on this flag. + * or take the maximum weight), the caller should remove specific edges themselves and not rely + * on this flag. * @param [in] do_expensive_check If true, do expensive checks to validate the input data * is consistent with software assumptions. If false bypass these checks. * @param [out] graph A pointer to the graph object diff --git a/python/cugraph/cugraph/dask/community/egonet.py b/python/cugraph/cugraph/dask/community/egonet.py index 06f5d5b9a79..e49d4777cef 100644 --- a/python/cugraph/cugraph/dask/community/egonet.py +++ b/python/cugraph/cugraph/dask/community/egonet.py @@ -18,7 +18,9 @@ import cugraph.dask.comms.comms as Comms import dask_cudf import cudf -from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.dask.common.part_utils import ( + persist_dask_df_equal_parts_per_worker, +) from pylibcugraph import ResourceHandle, ego_graph as pylibcugraph_ego_graph @@ -135,11 +137,7 @@ def ego_graph(input_graph, n, radius=1, center=True): n = dask_cudf.from_cudf(n, npartitions=min(input_graph._npartitions, len(n))) n = n.astype(n_type) - n = get_distributed_data(n) - wait(n) - - n = n.worker_to_parts - + n = persist_dask_df_equal_parts_per_worker(n, client, return_type="dict") do_expensive_check = False result = [ @@ -147,13 +145,13 @@ def ego_graph(input_graph, n, radius=1, center=True): _call_ego_graph, Comms.get_session_id(), input_graph._plc_graph[w], - n[w][0], + n_[0] if n_ else cudf.Series(dtype=n_type), radius, do_expensive_check, workers=[w], allow_other_workers=False, ) - for w in Comms.get_workers() + for w, n_ in n.items() ] wait(result) diff --git a/python/cugraph/cugraph/dask/community/induced_subgraph.py b/python/cugraph/cugraph/dask/community/induced_subgraph.py index 5d902f667a4..d079bcaf653 100644 --- a/python/cugraph/cugraph/dask/community/induced_subgraph.py +++ b/python/cugraph/cugraph/dask/community/induced_subgraph.py @@ -19,7 +19,9 @@ import dask_cudf import cudf import cupy as cp -from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.dask.common.part_utils import ( + persist_dask_df_equal_parts_per_worker, +) from typing import Union, Tuple from pylibcugraph import ( @@ -154,15 +156,12 @@ def induced_subgraph( vertices_type = input_graph.input_df.dtypes[0] if isinstance(vertices, (cudf.Series, cudf.DataFrame)): - vertices = dask_cudf.from_cudf( - vertices, npartitions=min(input_graph._npartitions, len(vertices)) - ) + vertices = dask_cudf.from_cudf(vertices, npartitions=input_graph._npartitions) vertices = vertices.astype(vertices_type) - vertices = get_distributed_data(vertices) - wait(vertices) - - vertices = vertices.worker_to_parts + vertices = persist_dask_df_equal_parts_per_worker( + vertices, client, return_type="dict" + ) do_expensive_check = False @@ -171,13 +170,13 @@ def induced_subgraph( _call_induced_subgraph, Comms.get_session_id(), input_graph._plc_graph[w], - vertices[w][0], + vertices_[0] if vertices_ else cudf.Series(dtype=vertices_type), offsets, do_expensive_check, workers=[w], allow_other_workers=False, ) - for w in Comms.get_workers() + for w, vertices_ in vertices.items() ] wait(result) diff --git a/python/cugraph/cugraph/dask/community/leiden.py b/python/cugraph/cugraph/dask/community/leiden.py index 67bd0876ce6..10a266ed519 100644 --- a/python/cugraph/cugraph/dask/community/leiden.py +++ b/python/cugraph/cugraph/dask/community/leiden.py @@ -125,9 +125,19 @@ def leiden( Examples -------- - >>> from cugraph.datasets import karate - >>> G = karate.get_graph(fetch=True) - >>> parts, modularity_score = cugraph.leiden(G) + >>> import cugraph.dask as dcg + >>> import dask_cudf + >>> # ... Init a DASK Cluster + >>> # see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html + >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. + >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") + >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", + ... chunksize=chunksize, delimiter=" ", + ... names=["src", "dst", "value"], + ... dtype=["int32", "int32", "float32"]) + >>> dg = cugraph.Graph() + >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst') + >>> parts, modularity_score = dcg.leiden(dg) """ diff --git a/python/cugraph/cugraph/dask/community/louvain.py b/python/cugraph/cugraph/dask/community/louvain.py index 1b091817a1a..e83d41811ea 100644 --- a/python/cugraph/cugraph/dask/community/louvain.py +++ b/python/cugraph/cugraph/dask/community/louvain.py @@ -129,9 +129,19 @@ def louvain( Examples -------- - >>> from cugraph.datasets import karate - >>> G = karate.get_graph(fetch=True) - >>> parts = cugraph.louvain(G) + >>> import cugraph.dask as dcg + >>> import dask_cudf + >>> # ... Init a DASK Cluster + >>> # see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html + >>> # Download dataset from https://github.com/rapidsai/cugraph/datasets/.. + >>> chunksize = dcg.get_chunksize(datasets_path / "karate.csv") + >>> ddf = dask_cudf.read_csv(datasets_path / "karate.csv", + ... chunksize=chunksize, delimiter=" ", + ... names=["src", "dst", "value"], + ... dtype=["int32", "int32", "float32"]) + >>> dg = cugraph.Graph() + >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst') + >>> parts, modularity_score = dcg.louvain(dg) """ diff --git a/python/cugraph/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/cugraph/dask/link_analysis/pagerank.py index 2dfd25fa522..1dffb3cba78 100644 --- a/python/cugraph/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/cugraph/dask/link_analysis/pagerank.py @@ -28,7 +28,9 @@ ) import cugraph.dask.comms.comms as Comms -from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.dask.common.part_utils import ( + persist_dask_df_equal_parts_per_worker, +) from cugraph.exceptions import FailedToConvergeError @@ -352,7 +354,14 @@ def pagerank( personalization, npartitions=len(Comms.get_workers()) ) - data_prsztn = get_distributed_data(personalization_ddf) + data_prsztn = persist_dask_df_equal_parts_per_worker( + personalization_ddf, client, return_type="dict" + ) + + empty_df = cudf.DataFrame(columns=list(personalization_ddf.columns)) + empty_df = empty_df.astype( + dict(zip(personalization_ddf.columns, personalization_ddf.dtypes)) + ) result = [ client.submit( @@ -361,7 +370,7 @@ def pagerank( input_graph._plc_graph[w], precomputed_vertex_out_weight_vertices, precomputed_vertex_out_weight_sums, - data_personalization[0], + data_personalization[0] if data_personalization else empty_df, initial_guess_vertices, initial_guess_values, alpha, @@ -372,7 +381,7 @@ def pagerank( workers=[w], allow_other_workers=False, ) - for w, data_personalization in data_prsztn.worker_to_parts.items() + for w, data_personalization in data_prsztn.items() ] else: result = [ diff --git a/python/cugraph/cugraph/dask/sampling/random_walks.py b/python/cugraph/cugraph/dask/sampling/random_walks.py index 993544ac45c..bb9baf2c92c 100644 --- a/python/cugraph/cugraph/dask/sampling/random_walks.py +++ b/python/cugraph/cugraph/dask/sampling/random_walks.py @@ -16,6 +16,9 @@ import dask_cudf import cudf import operator as op +from cugraph.dask.common.part_utils import ( + persist_dask_df_equal_parts_per_worker, +) from pylibcugraph import ResourceHandle @@ -24,7 +27,6 @@ ) from cugraph.dask.comms import comms as Comms -from cugraph.dask.common.input_utils import get_distributed_data def convert_to_cudf(cp_paths, number_map=None, is_vertex_paths=False): @@ -104,7 +106,7 @@ def random_walks( max_path_length : int The maximum path length """ - + client = default_client() if isinstance(start_vertices, int): start_vertices = [start_vertices] @@ -126,23 +128,21 @@ def random_walks( start_vertices, npartitions=min(input_graph._npartitions, len(start_vertices)) ) start_vertices = start_vertices.astype(start_vertices_type) - start_vertices = get_distributed_data(start_vertices) - wait(start_vertices) - start_vertices = start_vertices.worker_to_parts - - client = default_client() + start_vertices = persist_dask_df_equal_parts_per_worker( + start_vertices, client, return_type="dict" + ) result = [ client.submit( _call_plc_uniform_random_walks, Comms.get_session_id(), input_graph._plc_graph[w], - start_vertices[w][0], + start_v[0] if start_v else cudf.Series(dtype=start_vertices_type), max_depth, workers=[w], allow_other_workers=False, ) - for w in Comms.get_workers() + for w, start_v in start_vertices.items() ] wait(result) diff --git a/python/cugraph/cugraph/dask/traversal/bfs.py b/python/cugraph/cugraph/dask/traversal/bfs.py index cf467aaa18f..412fd851ad6 100644 --- a/python/cugraph/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/cugraph/dask/traversal/bfs.py @@ -16,7 +16,9 @@ from pylibcugraph import ResourceHandle, bfs as pylibcugraph_bfs from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.dask.common.part_utils import ( + persist_dask_df_equal_parts_per_worker, +) import cugraph.dask.comms.comms as Comms import cudf import dask_cudf @@ -159,8 +161,13 @@ def bfs(input_graph, start, depth_limit=None, return_distances=True, check_start tmp_col_names = None start = input_graph.lookup_internal_vertex_id(start, tmp_col_names) + vertex_dtype = start.dtype # if the edgelist was renumbered, update + # the vertex type accordingly + + data_start = persist_dask_df_equal_parts_per_worker( + start, client, return_type="dict" + ) - data_start = get_distributed_data(start) do_expensive_check = False # FIXME: Why is 'direction_optimizing' not part of the python cugraph API # and why is it set to 'False' by default @@ -171,7 +178,7 @@ def bfs(input_graph, start, depth_limit=None, return_distances=True, check_start _call_plc_bfs, Comms.get_session_id(), input_graph._plc_graph[w], - st[0], + st[0] if st else cudf.Series(dtype=vertex_dtype), depth_limit, direction_optimizing, return_distances, @@ -179,7 +186,7 @@ def bfs(input_graph, start, depth_limit=None, return_distances=True, check_start workers=[w], allow_other_workers=False, ) - for w, st in data_start.worker_to_parts.items() + for w, st in data_start.items() ] wait(cupy_result) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 935d0c597d4..f666900b226 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -36,8 +36,8 @@ from cugraph.structure.symmetrize import symmetrize from cugraph.dask.common.part_utils import ( get_persisted_df_worker_map, - get_length_of_parts, persist_dask_df_equal_parts_per_worker, + _chunk_lst, ) from cugraph.dask import get_n_workers import cugraph.dask.comms.comms as Comms @@ -81,6 +81,10 @@ def __init__(self, properties): self.destination_columns = None self.weight_column = None self.vertex_columns = None + self.vertex_type = None + self.weight_type = None + self.edge_id_type = None + self.edge_type_id_type = None def _make_plc_graph( sID, @@ -89,51 +93,69 @@ def _make_plc_graph( src_col_name, dst_col_name, store_transposed, - num_edges, + vertex_type, + weight_type, + edge_id_type, + edge_type_id, ): - weights = None edge_ids = None edge_types = None - if simpleDistributedGraphImpl.edgeWeightCol in edata_x[0]: - weights = _get_column_from_ls_dfs( - edata_x, simpleDistributedGraphImpl.edgeWeightCol - ) - if weights.dtype == "int32": - weights = weights.astype("float32") - elif weights.dtype == "int64": - weights = weights.astype("float64") - - if simpleDistributedGraphImpl.edgeIdCol in edata_x[0]: - edge_ids = _get_column_from_ls_dfs( - edata_x, simpleDistributedGraphImpl.edgeIdCol - ) - if edata_x[0][src_col_name].dtype == "int64" and edge_ids.dtype != "int64": - edge_ids = edge_ids.astype("int64") + num_arrays = len(edata_x) + if weight_type is not None: + weights = [ + edata_x[i][simpleDistributedGraphImpl.edgeWeightCol] + for i in range(num_arrays) + ] + if weight_type == "int32": + weights = [w_array.astype("float32") for w_array in weights] + elif weight_type == "int64": + weights = [w_array.astype("float64") for w_array in weights] + + if edge_id_type is not None: + edge_ids = [ + edata_x[i][simpleDistributedGraphImpl.edgeIdCol] + for i in range(num_arrays) + ] + if vertex_type == "int64" and edge_id_type != "int64": + edge_ids = [e_id_array.astype("int64") for e_id_array in edge_ids] warnings.warn( - f"Vertex type is int64 but edge id type is {edge_ids.dtype}" + f"Vertex type is int64 but edge id type is {edge_ids[0].dtype}" ", automatically casting edge id type to int64. " "This may cause extra memory usage. Consider passing" " a int64 list of edge ids instead." ) - if simpleDistributedGraphImpl.edgeTypeCol in edata_x[0]: - edge_types = _get_column_from_ls_dfs( - edata_x, simpleDistributedGraphImpl.edgeTypeCol - ) + if edge_type_id is not None: + edge_types = [ + edata_x[i][simpleDistributedGraphImpl.edgeTypeCol] + for i in range(num_arrays) + ] - return MGGraph( + src_array = [edata_x[i][src_col_name] for i in range(num_arrays)] + dst_array = [edata_x[i][dst_col_name] for i in range(num_arrays)] + plc_graph = MGGraph( resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), graph_properties=graph_props, - src_array=_get_column_from_ls_dfs(edata_x, src_col_name), - dst_array=_get_column_from_ls_dfs(edata_x, dst_col_name), - weight_array=weights, - edge_id_array=edge_ids, - edge_type_array=edge_types, + src_array=src_array if src_array else cudf.Series(dtype=vertex_type), + dst_array=dst_array if dst_array else cudf.Series(dtype=vertex_type), + weight_array=weights + if weights + else ([cudf.Series(dtype=weight_type)] if weight_type else None), + edge_id_array=edge_ids + if edge_ids + else ([cudf.Series(dtype=edge_id_type)] if edge_id_type else None), + edge_type_array=edge_types + if edge_types + else ([cudf.Series(dtype=edge_type_id)] if edge_type_id else None), + num_arrays=num_arrays, store_transposed=store_transposed, - num_edges=num_edges, do_expensive_check=False, ) + del edata_x + gc.collect() + + return plc_graph # Functions def __from_edgelist( @@ -182,7 +204,6 @@ def __from_edgelist( workers = _client.scheduler_info()["workers"] # Repartition to 2 partitions per GPU for memory efficient process input_ddf = input_ddf.repartition(npartitions=len(workers) * 2) - input_ddf = input_ddf.map_partitions(lambda df: df.copy()) # The dataframe will be symmetrized iff the graph is undirected # otherwise, the inital dataframe will be returned if edge_attr is not None: @@ -314,19 +335,25 @@ def __from_edgelist( dst_col_name = self.renumber_map.renumbered_dst_col_name ddf = self.edgelist.edgelist_df + + # Get the edgelist dtypes + self.vertex_type = ddf[src_col_name].dtype + if simpleDistributedGraphImpl.edgeWeightCol in ddf.columns: + self.weight_type = ddf[simpleDistributedGraphImpl.edgeWeightCol].dtype + if simpleDistributedGraphImpl.edgeIdCol in ddf.columns: + self.edge_id_type = ddf[simpleDistributedGraphImpl.edgeIdCol].dtype + if simpleDistributedGraphImpl.edgeTypeCol in ddf.columns: + self.edge_type_id_type = ddf[simpleDistributedGraphImpl.edgeTypeCol].dtype + graph_props = GraphProperties( is_multigraph=self.properties.multi_edge, is_symmetric=not self.properties.directed, ) ddf = ddf.repartition(npartitions=len(workers) * 2) - persisted_keys_d = persist_dask_df_equal_parts_per_worker( - ddf, _client, return_type="dict" - ) - del ddf - length_of_parts = get_length_of_parts(persisted_keys_d, _client) - num_edges = sum( - [item for sublist in length_of_parts.values() for item in sublist] - ) + ddf_keys = ddf.to_delayed() + workers = _client.scheduler_info()["workers"].keys() + ddf_keys_ls = _chunk_lst(ddf_keys, len(workers)) + delayed_tasks_d = { w: delayed(simpleDistributedGraphImpl._make_plc_graph)( Comms.get_session_id(), @@ -335,9 +362,12 @@ def __from_edgelist( src_col_name, dst_col_name, store_transposed, - num_edges, + self.vertex_type, + self.weight_type, + self.edge_id_type, + self.edge_type_id_type, ) - for w, edata in persisted_keys_d.items() + for w, edata in zip(workers, ddf_keys_ls) } self._plc_graph = { w: _client.compute( @@ -346,8 +376,9 @@ def __from_edgelist( for w, delayed_task in delayed_tasks_d.items() } wait(list(self._plc_graph.values())) - del persisted_keys_d + del ddf_keys del delayed_tasks_d + gc.collect() _client.run(gc.collect) @property @@ -1189,18 +1220,3 @@ def vertex_column_size(self): @property def _npartitions(self) -> int: return len(self._plc_graph) - - -def _get_column_from_ls_dfs(lst_df, col_name): - """ - This function concatenates the column - and drops it from the input list - """ - len_df = sum([len(df) for df in lst_df]) - if len_df == 0: - return lst_df[0][col_name] - output_col = cudf.concat([df[col_name] for df in lst_df], ignore_index=True) - for df in lst_df: - df.drop(columns=[col_name], inplace=True) - gc.collect() - return output_col diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index 4c00e68344d..b324ff65834 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -299,10 +299,8 @@ def _memory_efficient_drop_duplicates(ddf, vertex_col_name, num_workers): Drop duplicate edges from the input dataframe. """ # drop duplicates has a 5x+ overhead - # and does not seem to be working as expected - # TODO: Triage an MRE ddf = ddf.reset_index(drop=True).repartition(npartitions=num_workers * 2) - ddf = ddf.groupby(by=[*vertex_col_name], as_index=False).min( - split_out=num_workers * 2 + ddf = ddf.drop_duplicates( + subset=[*vertex_col_name], ignore_index=True, split_out=num_workers * 2 ) return ddf diff --git a/python/cugraph/cugraph/testing/mg_utils.py b/python/cugraph/cugraph/testing/mg_utils.py index bd165ba3db5..32854652f05 100644 --- a/python/cugraph/cugraph/testing/mg_utils.py +++ b/python/cugraph/cugraph/testing/mg_utils.py @@ -33,6 +33,7 @@ def start_dask_client( rmm_pool_size=None, dask_worker_devices=None, jit_unspill=False, + worker_class=None, device_memory_limit=0.8, ): """ @@ -141,6 +142,7 @@ def start_dask_client( rmm_async=rmm_async, CUDA_VISIBLE_DEVICES=dask_worker_devices, jit_unspill=jit_unspill, + worker_class=worker_class, device_memory_limit=device_memory_limit, ) client = Client(cluster) diff --git a/python/cugraph/cugraph/tests/conftest.py b/python/cugraph/cugraph/tests/conftest.py index 916e445cfdb..cb5755128eb 100644 --- a/python/cugraph/cugraph/tests/conftest.py +++ b/python/cugraph/cugraph/tests/conftest.py @@ -20,6 +20,9 @@ import os import tempfile +# Avoid timeout during shutdown +from dask_cuda.utils_test import IncreasedCloseTimeoutNanny + # module-wide fixtures @@ -40,7 +43,9 @@ def dask_client(): # start_dask_client will check for the SCHEDULER_FILE and # DASK_WORKER_DEVICES env vars and use them when creating a client if # set. start_dask_client will also initialize the Comms singleton. - dask_client, dask_cluster = start_dask_client() + dask_client, dask_cluster = start_dask_client( + worker_class=IncreasedCloseTimeoutNanny + ) yield dask_client diff --git a/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py b/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py index 5590eb17401..73ec13c674c 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py @@ -45,7 +45,7 @@ def setup_function(): fixture_params = gen_fixture_params_product( (datasets, "graph_file"), ([50], "max_iter"), - ([1.0e-6], "tol"), + ([1.0e-4], "tol"), # FIXME: Temporarily lower tolerance (IS_DIRECTED, "directed"), ) diff --git a/python/cugraph/cugraph/tests/structure/test_graph_mg.py b/python/cugraph/cugraph/tests/structure/test_graph_mg.py index 3024e50402a..7837916ae53 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph_mg.py +++ b/python/cugraph/cugraph/tests/structure/test_graph_mg.py @@ -30,6 +30,9 @@ from cugraph.dask.traversal.bfs import convert_to_cudf from cugraph.dask.common.input_utils import get_distributed_data from pylibcugraph.testing.utils import gen_fixture_params_product +from cugraph.dask.common.part_utils import ( + persist_dask_df_equal_parts_per_worker, +) # ============================================================================= @@ -141,10 +144,13 @@ def test_create_mg_graph(dask_client, input_combo): assert len(G._plc_graph) == len(dask_client.has_what()) start = dask_cudf.from_cudf(cudf.Series([1], dtype="int32"), len(G._plc_graph)) + vertex_dtype = start.dtype if G.renumbered: start = G.lookup_internal_vertex_id(start, None) - data_start = get_distributed_data(start) + data_start = persist_dask_df_equal_parts_per_worker( + start, dask_client, return_type="dict" + ) res = [ dask_client.submit( @@ -159,10 +165,10 @@ def test_create_mg_graph(dask_client, input_combo): ), Comms.get_session_id(), G._plc_graph[w], - data_start.worker_to_parts[w][0], + st[0] if st else cudf.Series(dtype=vertex_dtype), workers=[w], ) - for w in Comms.get_workers() + for w, st in data_start.items() ] wait(res) diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph.pxd index 590c5679264..28a9f5a3be5 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -51,12 +51,38 @@ cdef extern from "cugraph_c/graph.h": bool_t check, cugraph_graph_t** graph, cugraph_error_t** error) + + # Supports isolated vertices + cdef cugraph_error_code_t \ + cugraph_graph_create_sg( + const cugraph_resource_handle_t* handle, + const cugraph_graph_properties_t* properties, + const cugraph_type_erased_device_array_view_t* vertices, + const cugraph_type_erased_device_array_view_t* src, + const cugraph_type_erased_device_array_view_t* dst, + const cugraph_type_erased_device_array_view_t* weights, + const cugraph_type_erased_device_array_view_t* edge_ids, + const cugraph_type_erased_device_array_view_t* edge_types, + bool_t store_transposed, + bool_t renumber, + bool_t drop_self_loops, + bool_t drop_multi_edges, + bool_t check, + cugraph_graph_t** graph, + cugraph_error_t** error) # This may get renamed to cugraph_graph_free() cdef void \ cugraph_sg_graph_free( cugraph_graph_t* graph ) + + # FIXME: Might want to delete 'cugraph_sg_graph_free' and replace + # 'cugraph_mg_graph_free' by 'cugraph_graph_free' + cdef void \ + cugraph_graph_free( + cugraph_graph_t* graph + ) cdef cugraph_error_code_t \ cugraph_mg_graph_create( @@ -96,6 +122,22 @@ cdef extern from "cugraph_c/graph.h": cugraph_error_t** error ) + cdef cugraph_error_code_t \ + cugraph_graph_create_sg_from_csr( + const cugraph_resource_handle_t* handle, + const cugraph_graph_properties_t* properties, + const cugraph_type_erased_device_array_view_t* offsets, + const cugraph_type_erased_device_array_view_t* indices, + const cugraph_type_erased_device_array_view_t* weights, + const cugraph_type_erased_device_array_view_t* edge_ids, + const cugraph_type_erased_device_array_view_t* edge_type_ids, + bool_t store_transposed, + bool_t renumber, + bool_t check, + cugraph_graph_t** graph, + cugraph_error_t** error + ) + cdef void \ cugraph_sg_graph_free( cugraph_graph_t* graph @@ -117,6 +159,24 @@ cdef extern from "cugraph_c/graph.h": cugraph_error_t** error ) + cdef cugraph_error_code_t \ + cugraph_graph_create_mg( + const cugraph_resource_handle_t* handle, + const cugraph_graph_properties_t* properties, + const cugraph_type_erased_device_array_view_t** vertices, + const cugraph_type_erased_device_array_view_t** src, + const cugraph_type_erased_device_array_view_t** dst, + const cugraph_type_erased_device_array_view_t** weights, + const cugraph_type_erased_device_array_view_t** edge_ids, + const cugraph_type_erased_device_array_view_t** edge_type_ids, + bool_t store_transposed, + size_t num_arrays, + bool_t drop_self_loops, + bool_t drop_multi_edges, + bool_t do_expensive_check, + cugraph_graph_t** graph, + cugraph_error_t** error) + cdef void \ cugraph_mg_graph_free( cugraph_graph_t* graph diff --git a/python/pylibcugraph/pylibcugraph/analyze_clustering_edge_cut.pyx b/python/pylibcugraph/pylibcugraph/analyze_clustering_edge_cut.pyx index 60613f27a0d..3370e71f469 100644 --- a/python/pylibcugraph/pylibcugraph/analyze_clustering_edge_cut.pyx +++ b/python/pylibcugraph/pylibcugraph/analyze_clustering_edge_cut.pyx @@ -86,7 +86,7 @@ def analyze_clustering_edge_cut(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=True, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertex, cluster) = pylibcugraph.spectral_modularity_maximization( ... resource_handle, G, num_clusters=5, num_eigen_vects=2, evs_tolerance=0.00001 diff --git a/python/pylibcugraph/pylibcugraph/analyze_clustering_modularity.pyx b/python/pylibcugraph/pylibcugraph/analyze_clustering_modularity.pyx index 76ba48f52b7..2e7c1d2f649 100644 --- a/python/pylibcugraph/pylibcugraph/analyze_clustering_modularity.pyx +++ b/python/pylibcugraph/pylibcugraph/analyze_clustering_modularity.pyx @@ -87,7 +87,7 @@ def analyze_clustering_modularity(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=True, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertex, cluster) = pylibcugraph.spectral_modularity_maximization( ... resource_handle, G, num_clusters=5, num_eigen_vects=2, evs_tolerance=0.00001 diff --git a/python/pylibcugraph/pylibcugraph/analyze_clustering_ratio_cut.pyx b/python/pylibcugraph/pylibcugraph/analyze_clustering_ratio_cut.pyx index 39b317e107d..c06f870d048 100644 --- a/python/pylibcugraph/pylibcugraph/analyze_clustering_ratio_cut.pyx +++ b/python/pylibcugraph/pylibcugraph/analyze_clustering_ratio_cut.pyx @@ -86,7 +86,7 @@ def analyze_clustering_ratio_cut(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=True, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertex, cluster) = pylibcugraph.spectral_modularity_maximization( ... resource_handle, G, num_clusters=5, num_eigen_vects=2, evs_tolerance=0.00001 diff --git a/python/pylibcugraph/pylibcugraph/balanced_cut_clustering.pyx b/python/pylibcugraph/pylibcugraph/balanced_cut_clustering.pyx index 5a61f9e0dd7..a1a5c8182eb 100644 --- a/python/pylibcugraph/pylibcugraph/balanced_cut_clustering.pyx +++ b/python/pylibcugraph/pylibcugraph/balanced_cut_clustering.pyx @@ -109,7 +109,7 @@ def balanced_cut_clustering(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=True, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, clusters) = pylibcugraph.balanced_cut_clustering( ... resource_handle, G, num_clusters=5, num_eigen_vects=2, evs_tolerance=0.00001 diff --git a/python/pylibcugraph/pylibcugraph/ecg.pyx b/python/pylibcugraph/pylibcugraph/ecg.pyx index c5c1fe2eda7..4188aaa213e 100644 --- a/python/pylibcugraph/pylibcugraph/ecg.pyx +++ b/python/pylibcugraph/pylibcugraph/ecg.pyx @@ -101,7 +101,7 @@ def ecg(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=True, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, clusters) = pylibcugraph.ecg(resource_handle, G) # FIXME: Check this docstring example diff --git a/python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx b/python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx index c88c9fe8a67..e1dae1ff10a 100644 --- a/python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx +++ b/python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx @@ -180,7 +180,7 @@ def edge_betweenness_centrality(ResourceHandle resource_handle, cdef cugraph_type_erased_device_array_view_t* values_ptr = \ cugraph_edge_centrality_result_get_values(result_ptr) - if graph.edge_id_view_ptr is NULL: + if graph.edge_id_view_ptr is NULL and graph.edge_id_view_ptr_ptr is NULL: cupy_edge_ids = None else: edge_ids_ptr = cugraph_edge_centrality_result_get_edge_ids(result_ptr) diff --git a/python/pylibcugraph/pylibcugraph/egonet.pyx b/python/pylibcugraph/pylibcugraph/egonet.pyx index d011d946e46..e7237cc3ba4 100644 --- a/python/pylibcugraph/pylibcugraph/egonet.pyx +++ b/python/pylibcugraph/pylibcugraph/egonet.pyx @@ -101,7 +101,7 @@ def ego_graph(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=False, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=False, renumber=False, do_expensive_check=False) >>> (sources, destinations, edge_weights, subgraph_offsets) = ... pylibcugraph.ego_graph(resource_handle, G, source_vertices, 2, False) diff --git a/python/pylibcugraph/pylibcugraph/eigenvector_centrality.pyx b/python/pylibcugraph/pylibcugraph/eigenvector_centrality.pyx index 88612c242e2..568f072ee3d 100644 --- a/python/pylibcugraph/pylibcugraph/eigenvector_centrality.pyx +++ b/python/pylibcugraph/pylibcugraph/eigenvector_centrality.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -97,7 +97,7 @@ def eigenvector_centrality(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=False, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, values) = pylibcugraph.eigenvector_centrality( resource_handle, G, 1e-6, 1000, False) diff --git a/python/pylibcugraph/pylibcugraph/graphs.pxd b/python/pylibcugraph/pylibcugraph/graphs.pxd index a2df44ba26e..dac69e0ad04 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pxd +++ b/python/pylibcugraph/pylibcugraph/graphs.pxd @@ -25,11 +25,13 @@ from pylibcugraph._cugraph_c.graph cimport ( cdef class _GPUGraph: cdef cugraph_graph_t* c_graph_ptr cdef cugraph_type_erased_device_array_view_t* edge_id_view_ptr - cdef cugraph_type_erased_device_array_view_t* weights_view_ptr + cdef cugraph_type_erased_device_array_view_t** edge_id_view_ptr_ptr + cdef cugraph_type_erased_device_array_view_t* weights_view_ptr + cdef cugraph_type_erased_device_array_view_t** weights_view_ptr_ptr cdef class SGGraph(_GPUGraph): pass cdef class MGGraph(_GPUGraph): - pass + pass diff --git a/python/pylibcugraph/pylibcugraph/graphs.pyx b/python/pylibcugraph/pylibcugraph/graphs.pyx index 33a8a09c6f4..b3065fa0684 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pyx +++ b/python/pylibcugraph/pylibcugraph/graphs.pyx @@ -18,16 +18,20 @@ from pylibcugraph._cugraph_c.error cimport ( cugraph_error_code_t, cugraph_error_t, ) +from cython.operator cimport dereference as deref from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, cugraph_type_erased_device_array_view_free, ) from pylibcugraph._cugraph_c.graph cimport ( - cugraph_sg_graph_create, - cugraph_mg_graph_create, - cugraph_sg_graph_create_from_csr, - cugraph_sg_graph_free, - cugraph_mg_graph_free, + cugraph_graph_create_sg, + cugraph_graph_create_mg, + cugraph_sg_graph_create_from_csr, #FIXME: Remove this once + # 'cugraph_graph_create_sg_from_csr' is exposed + cugraph_graph_create_sg_from_csr, + cugraph_sg_graph_free, #FIXME: Remove this + cugraph_graph_free, + cugraph_mg_graph_free, #FIXME: Remove this ) from pylibcugraph.resource_handle cimport ( ResourceHandle, @@ -38,8 +42,11 @@ from pylibcugraph.graph_properties cimport ( from pylibcugraph.utils cimport ( assert_success, assert_CAI_type, + get_c_type_from_numpy_type, create_cugraph_type_erased_device_array_view_from_py_obj, ) +from libc.stdlib cimport malloc + cdef class SGGraph(_GPUGraph): @@ -70,6 +77,9 @@ cdef class SGGraph(_GPUGraph): CSR format. In the case of a COO, The order of the array corresponds to the ordering of the src_offset_array, where the ith item in src_offset_array and the ith item in dst_index_array define the ith edge of the graph. + + vertices_array : device array type + Device array containing the isolated vertices of the graph. weight_array : device array type Device array containing the weight values of each directed edge. The @@ -105,6 +115,12 @@ cdef class SGGraph(_GPUGraph): COO: arrays represent src_array and dst_array CSR: arrays represent offset_array and index_array + drop_self_loops : bool, optional (default='False') + If true, drop any self loops that exist in the provided edge list. + + drop_multi_edges: bool, optional (default='False') + If true, drop any multi edges that exist in the provided edge list + Examples --------- >>> import pylibcugraph, cupy, numpy @@ -116,7 +132,7 @@ cdef class SGGraph(_GPUGraph): >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=False, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=False, renumber=False, do_expensive_check=False) """ @@ -131,7 +147,10 @@ cdef class SGGraph(_GPUGraph): do_expensive_check=False, edge_id_array=None, edge_type_array=None, - input_array_format="COO"): + input_array_format="COO", + vertices_array=None, + drop_self_loops=False, + drop_multi_edges=False): # FIXME: add tests for these if not(isinstance(store_transposed, (int, bool))): @@ -145,13 +164,13 @@ cdef class SGGraph(_GPUGraph): f"{type(do_expensive_check)}") assert_CAI_type(src_or_offset_array, "src_or_offset_array") assert_CAI_type(dst_or_index_array, "dst_or_index_array") + assert_CAI_type(vertices_array, "vertices_array", True) assert_CAI_type(weight_array, "weight_array", True) - if edge_id_array is not None: - assert_CAI_type(edge_id_array, "edge_id_array") - if edge_type_array is not None: - assert_CAI_type(edge_type_array, "edge_type_array") + assert_CAI_type(edge_id_array, "edge_id_array", True) + assert_CAI_type(edge_type_array, "edge_type_array", True) - # FIXME: assert that src_or_offset_array and dst_or_index_array have the same type + # FIXME: assert that src_or_offset_array and dst_or_index_array have + # the same type cdef cugraph_error_t* error_ptr cdef cugraph_error_code_t error_code @@ -159,31 +178,31 @@ cdef class SGGraph(_GPUGraph): cdef cugraph_type_erased_device_array_view_t* srcs_or_offsets_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj( src_or_offset_array - ) - + ) cdef cugraph_type_erased_device_array_view_t* dsts_or_indices_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj( dst_or_index_array ) - - + cdef cugraph_type_erased_device_array_view_t* vertices_view_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + vertices_array + ) self.weights_view_ptr = create_cugraph_type_erased_device_array_view_from_py_obj( weight_array ) - self.edge_id_view_ptr = create_cugraph_type_erased_device_array_view_from_py_obj( edge_id_array - ) - + ) cdef cugraph_type_erased_device_array_view_t* edge_type_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj( edge_type_array ) if input_array_format == "COO": - error_code = cugraph_sg_graph_create( + error_code = cugraph_graph_create_sg( resource_handle.c_resource_handle_ptr, &(graph_properties.c_graph_properties), + vertices_view_ptr, srcs_or_offsets_view_ptr, dsts_or_indices_view_ptr, self.weights_view_ptr, @@ -191,12 +210,13 @@ cdef class SGGraph(_GPUGraph): edge_type_view_ptr, store_transposed, renumber, + drop_self_loops, + drop_multi_edges, do_expensive_check, &(self.c_graph_ptr), &error_ptr) - assert_success(error_code, error_ptr, - "cugraph_sg_graph_create()") + "cugraph_graph_create_sg()") elif input_array_format == "CSR": error_code = cugraph_sg_graph_create_from_csr( @@ -209,6 +229,8 @@ cdef class SGGraph(_GPUGraph): edge_type_view_ptr, store_transposed, renumber, + # drop_self_loops, #FIXME: Not supported yet + # drop_multi_edges, #FIXME: Not supported yet do_expensive_check, &(self.c_graph_ptr), &error_ptr) @@ -223,7 +245,8 @@ cdef class SGGraph(_GPUGraph): cugraph_type_erased_device_array_view_free(srcs_or_offsets_view_ptr) cugraph_type_erased_device_array_view_free(dsts_or_indices_view_ptr) - cugraph_type_erased_device_array_view_free(self.weights_view_ptr) + if self.weights_view_ptr is not NULL: + cugraph_type_erased_device_array_view_free(self.weights_view_ptr) if self.edge_id_view_ptr is not NULL: cugraph_type_erased_device_array_view_free(self.edge_id_view_ptr) if edge_type_view_ptr is not NULL: @@ -259,6 +282,9 @@ cdef class MGGraph(_GPUGraph): each directed edge. The order of the array corresponds to the ordering of the src_array, where the ith item in src_array and the ith item in dst_array define the ith edge of the graph. + + vertices_array : device array type + Device array containing the isolated vertices of the graph. weight_array : device array type Device array containing the weight values of each directed edge. The @@ -270,8 +296,10 @@ cdef class MGGraph(_GPUGraph): Set to True if the graph should be transposed. This is required for some algorithms, such as pagerank. - num_edges : int - Number of edges + num_arrays : size_t + Number of arrays. + + If provided, all list of device arrays should be of the same size. do_expensive_check : bool If True, performs more extensive tests on the inputs to ensure @@ -286,6 +314,12 @@ cdef class MGGraph(_GPUGraph): Device array containing the edge types of each directed edge. Must match the ordering of the src/dst/edge_id arrays. Optional (may be null). If provided, edge_id_array must be provided. + + drop_self_loops : bool, optional (default='False') + If true, drop any self loops that exist in the provided edge list. + + drop_multi_edges: bool, optional (default='False') + If true, drop any multi edges that exist in the provided edge list """ def __cinit__(self, ResourceHandle resource_handle, @@ -294,85 +328,156 @@ cdef class MGGraph(_GPUGraph): dst_array, weight_array=None, store_transposed=False, - num_edges=-1, - do_expensive_check=False, + do_expensive_check=False, # default to False edge_id_array=None, - edge_type_array=None): + edge_type_array=None, + vertices_array=None, + size_t num_arrays=1, # default value to not break users + drop_self_loops=False, + drop_multi_edges=False): - # FIXME: add tests for these if not(isinstance(store_transposed, (int, bool))): raise TypeError("expected int or bool for store_transposed, got " f"{type(store_transposed)}") - if not(isinstance(num_edges, (int))): - raise TypeError("expected int for num_edges, got " - f"{type(num_edges)}") - if num_edges < 0: - raise TypeError("num_edges must be > 0") + if not(isinstance(do_expensive_check, (int, bool))): raise TypeError("expected int or bool for do_expensive_check, got " f"{type(do_expensive_check)}") - assert_CAI_type(src_array, "src_array") - assert_CAI_type(dst_array, "dst_array") - assert_CAI_type(weight_array, "weight_array", True) - - assert_CAI_type(edge_id_array, "edge_id_array", True) - if edge_id_array is not None and len(edge_id_array) != len(src_array): - raise ValueError('Edge id array must be same length as edgelist') - - assert_CAI_type(edge_type_array, "edge_type_array", True) - if edge_type_array is not None and len(edge_type_array) != len(src_array): - raise ValueError('Edge type array must be same length as edgelist') - - # FIXME: assert that src_array and dst_array have the same type cdef cugraph_error_t* error_ptr cdef cugraph_error_code_t error_code - cdef cugraph_type_erased_device_array_view_t* srcs_view_ptr = \ - create_cugraph_type_erased_device_array_view_from_py_obj( - src_array - ) - cdef cugraph_type_erased_device_array_view_t* dsts_view_ptr = \ - create_cugraph_type_erased_device_array_view_from_py_obj( - dst_array - ) - self.weights_view_ptr = \ - create_cugraph_type_erased_device_array_view_from_py_obj( - weight_array - ) - self.edge_id_view_ptr = \ - create_cugraph_type_erased_device_array_view_from_py_obj( - edge_id_array - ) - cdef cugraph_type_erased_device_array_view_t* edge_type_view_ptr = \ - create_cugraph_type_erased_device_array_view_from_py_obj( - edge_type_array - ) - error_code = cugraph_mg_graph_create( + if not isinstance(src_array, list): + src_array = [src_array] + if not any(src_array): + src_array = src_array * num_arrays + + if not isinstance(dst_array, list): + dst_array = [dst_array] + if not any(dst_array): + dst_array = dst_array * num_arrays + + if not isinstance(weight_array, list): + weight_array = [weight_array] + if not any(weight_array): + weight_array = weight_array * num_arrays + + if not isinstance(edge_id_array, list): + edge_id_array = [edge_id_array] + if not any(edge_id_array): + edge_id_array = edge_id_array * num_arrays + + if not isinstance(edge_type_array, list): + edge_type_array = [edge_type_array] + if not any(edge_type_array): + edge_type_array = edge_type_array * num_arrays + + if not isinstance(vertices_array, list): + vertices_array = [vertices_array] + if not any(vertices_array): + vertices_array = vertices_array * num_arrays + + cdef cugraph_type_erased_device_array_view_t** srcs_view_ptr_ptr = NULL + cdef cugraph_type_erased_device_array_view_t** dsts_view_ptr_ptr = NULL + cdef cugraph_type_erased_device_array_view_t** vertices_view_ptr_ptr = NULL + cdef cugraph_type_erased_device_array_view_t** edge_type_view_ptr_ptr = NULL + + for i in range(num_arrays): + if do_expensive_check: + assert_CAI_type(src_array[i], "src_array") + assert_CAI_type(dst_array[i], "dst_array") + assert_CAI_type(weight_array[i], "weight_array", True) + assert_CAI_type(vertices_array[i], "vertices_array", True) + + assert_CAI_type(edge_id_array[i], "edge_id_array", True) + + if edge_id_array is not None and len(edge_id_array[i]) != len(src_array[i]): + raise ValueError('Edge id array must be same length as edgelist') + + assert_CAI_type(edge_type_array[i], "edge_type_array", True) + if edge_type_array[i] is not None and len(edge_type_array[i]) != len(src_array[i]): + raise ValueError('Edge type array must be same length as edgelist') + + if src_array[i] is not None: + if i == 0: + srcs_view_ptr_ptr = \ + malloc( + num_arrays * sizeof(cugraph_type_erased_device_array_view_t*)) + srcs_view_ptr_ptr[i] = \ + create_cugraph_type_erased_device_array_view_from_py_obj(src_array[i]) + + if dst_array[i] is not None: + if i == 0: + dsts_view_ptr_ptr = \ + malloc( + num_arrays * sizeof(cugraph_type_erased_device_array_view_t*)) + dsts_view_ptr_ptr[i] = \ + create_cugraph_type_erased_device_array_view_from_py_obj(dst_array[i]) + + if vertices_array[i] is not None: + if i == 0: + vertices_view_ptr_ptr = \ + malloc( + num_arrays * sizeof(cugraph_type_erased_device_array_view_t*)) + vertices_view_ptr_ptr[i] = \ + create_cugraph_type_erased_device_array_view_from_py_obj(vertices_array[i]) + + if weight_array[i] is not None: + if i == 0: + self.weights_view_ptr_ptr = \ + malloc( + num_arrays * sizeof(cugraph_type_erased_device_array_view_t*)) + self.weights_view_ptr_ptr[i] = \ + create_cugraph_type_erased_device_array_view_from_py_obj(weight_array[i]) + + if edge_id_array[i] is not None: + if i == 0: + self.edge_id_view_ptr_ptr = \ + malloc( + num_arrays * sizeof(cugraph_type_erased_device_array_view_t*)) + self.edge_id_view_ptr_ptr[i] = \ + create_cugraph_type_erased_device_array_view_from_py_obj(edge_id_array[i]) + + if edge_type_array[i] is not None: + if i == 0: + edge_type_view_ptr_ptr = \ + malloc( + num_arrays * sizeof(cugraph_type_erased_device_array_view_t*)) + edge_type_view_ptr_ptr[i] = \ + create_cugraph_type_erased_device_array_view_from_py_obj(edge_type_array[i]) + + error_code = cugraph_graph_create_mg( resource_handle.c_resource_handle_ptr, &(graph_properties.c_graph_properties), - srcs_view_ptr, - dsts_view_ptr, - self.weights_view_ptr, - self.edge_id_view_ptr, - edge_type_view_ptr, + vertices_view_ptr_ptr, + srcs_view_ptr_ptr, + dsts_view_ptr_ptr, + self.weights_view_ptr_ptr, + self.edge_id_view_ptr_ptr, + edge_type_view_ptr_ptr, store_transposed, - num_edges, + num_arrays, do_expensive_check, + drop_self_loops, + drop_multi_edges, &(self.c_graph_ptr), &error_ptr) assert_success(error_code, error_ptr, "cugraph_mg_graph_create()") - cugraph_type_erased_device_array_view_free(srcs_view_ptr) - cugraph_type_erased_device_array_view_free(dsts_view_ptr) - cugraph_type_erased_device_array_view_free(self.weights_view_ptr) - if self.edge_id_view_ptr is not NULL: - cugraph_type_erased_device_array_view_free(self.edge_id_view_ptr) - if edge_type_view_ptr is not NULL: - cugraph_type_erased_device_array_view_free(edge_type_view_ptr) + for i in range(num_arrays): + cugraph_type_erased_device_array_view_free(srcs_view_ptr_ptr[i]) + cugraph_type_erased_device_array_view_free(dsts_view_ptr_ptr[i]) + if vertices_view_ptr_ptr is not NULL: + cugraph_type_erased_device_array_view_free(vertices_view_ptr_ptr[i]) + if self.weights_view_ptr_ptr is not NULL: + cugraph_type_erased_device_array_view_free(self.weights_view_ptr_ptr[i]) + if self.edge_id_view_ptr_ptr is not NULL: + cugraph_type_erased_device_array_view_free(self.edge_id_view_ptr_ptr[i]) + if edge_type_view_ptr_ptr is not NULL: + cugraph_type_erased_device_array_view_free(edge_type_view_ptr_ptr[i]) def __dealloc__(self): if self.c_graph_ptr is not NULL: diff --git a/python/pylibcugraph/pylibcugraph/induced_subgraph.pyx b/python/pylibcugraph/pylibcugraph/induced_subgraph.pyx index aab36d3d5e0..99b89ec2a58 100644 --- a/python/pylibcugraph/pylibcugraph/induced_subgraph.pyx +++ b/python/pylibcugraph/pylibcugraph/induced_subgraph.pyx @@ -98,7 +98,7 @@ def induced_subgraph(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=False, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=False, renumber=False, do_expensive_check=False) >>> (sources, destinations, edge_weights, subgraph_offsets) = ... pylibcugraph.induced_subgraph( diff --git a/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx b/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx index cc91e76dd55..2c22c618249 100644 --- a/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx +++ b/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx @@ -96,7 +96,7 @@ def k_truss_subgraph(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=True, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=False, renumber=False, do_expensive_check=False) >>> (sources, destinations, edge_weights, subgraph_offsets) = ... pylibcugraph.k_truss_subgraph(resource_handle, G, k, False) diff --git a/python/pylibcugraph/pylibcugraph/leiden.pyx b/python/pylibcugraph/pylibcugraph/leiden.pyx index 87286234f16..04f8887551c 100644 --- a/python/pylibcugraph/pylibcugraph/leiden.pyx +++ b/python/pylibcugraph/pylibcugraph/leiden.pyx @@ -116,7 +116,7 @@ def leiden(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=True, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, clusters, modularity) = pylibcugraph.Leiden( resource_handle, G, 100, 1., False) diff --git a/python/pylibcugraph/pylibcugraph/louvain.pyx b/python/pylibcugraph/pylibcugraph/louvain.pyx index eca569d7da1..58f4f10bc18 100644 --- a/python/pylibcugraph/pylibcugraph/louvain.pyx +++ b/python/pylibcugraph/pylibcugraph/louvain.pyx @@ -103,7 +103,7 @@ def louvain(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=True, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, clusters, modularity) = pylibcugraph.louvain( resource_handle, G, 100, 1e-7, 1., False) diff --git a/python/pylibcugraph/pylibcugraph/node2vec.pyx b/python/pylibcugraph/pylibcugraph/node2vec.pyx index d0ab3f22b00..5d83fc46c3c 100644 --- a/python/pylibcugraph/pylibcugraph/node2vec.pyx +++ b/python/pylibcugraph/pylibcugraph/node2vec.pyx @@ -115,7 +115,7 @@ def node2vec(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=False, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=False, renumber=False, do_expensive_check=False) >>> (paths, weights, sizes) = pylibcugraph.node2vec( ... resource_handle, G, seeds, 3, True, 1.0, 1.0) diff --git a/python/pylibcugraph/pylibcugraph/pagerank.pyx b/python/pylibcugraph/pylibcugraph/pagerank.pyx index f831d844338..9fec1328bbf 100644 --- a/python/pylibcugraph/pylibcugraph/pagerank.pyx +++ b/python/pylibcugraph/pylibcugraph/pagerank.pyx @@ -154,7 +154,7 @@ def pagerank(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=False, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, pageranks) = pylibcugraph.pagerank( ... resource_handle, G, None, None, None, None, alpha=0.85, diff --git a/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx b/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx index 79ef80be549..85addffa694 100644 --- a/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx +++ b/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx @@ -161,7 +161,7 @@ def personalized_pagerank(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=False, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, pageranks) = pylibcugraph.personalized_pagerank( ... resource_handle, G, None, None, None, None, alpha=0.85, diff --git a/python/pylibcugraph/pylibcugraph/spectral_modularity_maximization.pyx b/python/pylibcugraph/pylibcugraph/spectral_modularity_maximization.pyx index c74b1f0db41..fa01714744d 100644 --- a/python/pylibcugraph/pylibcugraph/spectral_modularity_maximization.pyx +++ b/python/pylibcugraph/pylibcugraph/spectral_modularity_maximization.pyx @@ -109,7 +109,7 @@ def spectral_modularity_maximization(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=True, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, clusters) = pylibcugraph.spectral_modularity_maximization( ... resource_handle, G, num_clusters=5, num_eigen_vects=2, evs_tolerance=0.00001 diff --git a/python/pylibcugraph/pylibcugraph/sssp.pyx b/python/pylibcugraph/pylibcugraph/sssp.pyx index b2cd829cb2e..56765c4a1b8 100644 --- a/python/pylibcugraph/pylibcugraph/sssp.pyx +++ b/python/pylibcugraph/pylibcugraph/sssp.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -109,7 +109,7 @@ def sssp(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=False, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=False, renumber=False, do_expensive_check=False) >>> (vertices, distances, predecessors) = pylibcugraph.sssp( ... resource_handle, G, source=1, cutoff=999, diff --git a/python/pylibcugraph/pylibcugraph/tests/conftest.py b/python/pylibcugraph/pylibcugraph/tests/conftest.py index a7fcbfdb42a..228147a6e9f 100644 --- a/python/pylibcugraph/pylibcugraph/tests/conftest.py +++ b/python/pylibcugraph/pylibcugraph/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -135,11 +135,11 @@ def create_SGGraph(device_srcs, device_dsts, device_weights, transposed=False): graph_props = GraphProperties(is_symmetric=False, is_multigraph=False) g = SGGraph( - resource_handle, - graph_props, - device_srcs, - device_dsts, - device_weights, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=device_srcs, + dst_or_index_array=device_dsts, + weight_array=device_weights, store_transposed=transposed, renumber=False, do_expensive_check=False, diff --git a/python/pylibcugraph/pylibcugraph/tests/test_eigenvector_centrality.py b/python/pylibcugraph/pylibcugraph/tests/test_eigenvector_centrality.py index b4ff29f31c4..551dd58bdd6 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_eigenvector_centrality.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_eigenvector_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -56,11 +56,11 @@ def _generic_eigenvector_test( resource_handle = ResourceHandle() graph_props = GraphProperties(is_symmetric=False, is_multigraph=False) G = SGGraph( - resource_handle, - graph_props, - src_arr, - dst_arr, - wgt_arr, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=src_arr, + dst_or_index_array=dst_arr, + weight_array=wgt_arr, store_transposed=False, renumber=False, do_expensive_check=True, diff --git a/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py b/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py index 4ebb6f1895e..b555a9a16bb 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_graph_sg.py @@ -85,11 +85,11 @@ def test_sg_graph(graph_data): if is_valid: g = SGGraph( # noqa:F841 - resource_handle, - graph_props, - device_srcs, - device_dsts, - device_weights, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=device_srcs, + dst_or_index_array=device_dsts, + weight_array=device_weights, store_transposed=False, renumber=False, do_expensive_check=False, @@ -100,11 +100,11 @@ def test_sg_graph(graph_data): else: with pytest.raises(ValueError): SGGraph( - resource_handle, - graph_props, - device_srcs, - device_dsts, - device_weights, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=device_srcs, + dst_or_index_array=device_dsts, + weight_array=device_weights, store_transposed=False, renumber=False, do_expensive_check=False, @@ -130,7 +130,6 @@ def test_SGGraph_create_from_cudf(): SGGraph, ) - print("get edgelist...", end="", flush=True) edgelist = cudf.DataFrame( { "src": [0, 1, 2], @@ -139,10 +138,6 @@ def test_SGGraph_create_from_cudf(): } ) - print("edgelist = ", edgelist) - print("done", flush=True) - print("create Graph...", end="", flush=True) - graph_props = GraphProperties(is_multigraph=False, is_symmetric=False) plc_graph = SGGraph( diff --git a/python/pylibcugraph/pylibcugraph/tests/test_katz_centrality.py b/python/pylibcugraph/pylibcugraph/tests/test_katz_centrality.py index d12f90426fa..9550d3be481 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_katz_centrality.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -53,11 +53,11 @@ def _generic_katz_test( resource_handle = ResourceHandle() graph_props = GraphProperties(is_symmetric=False, is_multigraph=False) G = SGGraph( - resource_handle, - graph_props, - src_arr, - dst_arr, - wgt_arr, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=src_arr, + dst_or_index_array=dst_arr, + weight_array=wgt_arr, store_transposed=False, renumber=False, do_expensive_check=True, diff --git a/python/pylibcugraph/pylibcugraph/tests/test_louvain.py b/python/pylibcugraph/pylibcugraph/tests/test_louvain.py index adea5e01f15..620c50f8412 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_louvain.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_louvain.py @@ -81,11 +81,11 @@ def test_sg_louvain_cupy(): resolution = 1.0 sg = SGGraph( - resource_handle, - graph_props, - device_srcs, - device_dsts, - device_weights, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=device_srcs, + dst_or_index_array=device_dsts, + weight_array=device_weights, store_transposed=False, renumber=True, do_expensive_check=False, @@ -135,11 +135,11 @@ def test_sg_louvain_cudf(): resolution = 1.0 sg = SGGraph( - resource_handle, - graph_props, - device_srcs, - device_dsts, - device_weights, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=device_srcs, + dst_or_index_array=device_dsts, + weight_array=device_weights, store_transposed=False, renumber=True, do_expensive_check=False, diff --git a/python/pylibcugraph/pylibcugraph/tests/test_node2vec.py b/python/pylibcugraph/pylibcugraph/tests/test_node2vec.py index 0e400a5306c..fb303ce8047 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_node2vec.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_node2vec.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -94,11 +94,11 @@ def _run_node2vec( resource_handle = ResourceHandle() graph_props = GraphProperties(is_symmetric=False, is_multigraph=False) G = SGGraph( - resource_handle, - graph_props, - src_arr, - dst_arr, - wgt_arr, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=src_arr, + dst_or_index_array=dst_arr, + weight_array=wgt_arr, store_transposed=False, renumber=renumbered, do_expensive_check=True, @@ -795,11 +795,11 @@ def test_node2vec_renumber_cupy(graph_file, renumber): resource_handle = ResourceHandle() graph_props = GraphProperties(is_symmetric=False, is_multigraph=False) G = SGGraph( - resource_handle, - graph_props, - src_arr, - dst_arr, - wgt_arr, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=src_arr, + dst_or_index_array=dst_arr, + weight_array=wgt_arr, store_transposed=False, renumber=renumber, do_expensive_check=True, diff --git a/python/pylibcugraph/pylibcugraph/tests/test_pagerank.py b/python/pylibcugraph/pylibcugraph/tests/test_pagerank.py index 56c4878324f..2a313a33f83 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_pagerank.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_pagerank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/pylibcugraph/pylibcugraph/tests/test_sssp.py b/python/pylibcugraph/pylibcugraph/tests/test_sssp.py index ab46af4ff55..6ffbab76ae2 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_sssp.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/pylibcugraph/pylibcugraph/tests/test_triangle_count.py b/python/pylibcugraph/pylibcugraph/tests/test_triangle_count.py index aa0d5cd35f5..1862f94ac26 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_triangle_count.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_triangle_count.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -81,11 +81,11 @@ def test_sg_triangle_count_cupy(): start_list = None sg = SGGraph( - resource_handle, - graph_props, - device_srcs, - device_dsts, - device_weights, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=device_srcs, + dst_or_index_array=device_dsts, + weight_array=device_weights, store_transposed=False, renumber=True, do_expensive_check=False, @@ -131,11 +131,11 @@ def test_sg_triangle_count_cudf(): start_list = None sg = SGGraph( - resource_handle, - graph_props, - device_srcs, - device_dsts, - device_weights, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=device_srcs, + dst_or_index_array=device_dsts, + weight_array=device_weights, store_transposed=False, renumber=True, do_expensive_check=False, diff --git a/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py b/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py index ac04635edcf..ffa90731483 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py @@ -95,11 +95,11 @@ def test_neighborhood_sampling_cupy( num_edges = max(len(device_srcs), len(device_dsts)) sg = SGGraph( - resource_handle, - graph_props, - device_srcs, - device_dsts, - device_weights, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=device_srcs, + dst_or_index_array=device_dsts, + weight_array=device_weights, store_transposed=store_transposed, renumber=renumber, do_expensive_check=False, @@ -153,11 +153,11 @@ def test_neighborhood_sampling_cudf( num_edges = max(len(device_srcs), len(device_dsts)) sg = SGGraph( - resource_handle, - graph_props, - device_srcs, - device_dsts, - device_weights, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=device_srcs, + dst_or_index_array=device_dsts, + weight_array=device_weights, store_transposed=store_transposed, renumber=renumber, do_expensive_check=False, @@ -203,11 +203,11 @@ def test_neighborhood_sampling_large_sg_graph(gpubenchmark): fanout_vals = np.asarray([1, 2], dtype=np.int32) sg = SGGraph( - resource_handle, - graph_props, - device_srcs, - device_dsts, - device_weights, + resource_handle=resource_handle, + graph_properties=graph_props, + src_or_offset_array=device_srcs, + dst_or_index_array=device_dsts, + weight_array=device_weights, store_transposed=True, renumber=False, do_expensive_check=False, diff --git a/python/pylibcugraph/pylibcugraph/tests/test_utils.py b/python/pylibcugraph/pylibcugraph/tests/test_utils.py index 036a62b9c1e..64947c21b74 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_utils.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx index 1570523beb8..677695f93a9 100644 --- a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx +++ b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx @@ -116,7 +116,7 @@ def uniform_random_walks(ResourceHandle resource_handle, cdef cugraph_type_erased_device_array_view_t* path_ptr = \ cugraph_random_walk_result_get_paths(result_ptr) - if input_graph.weights_view_ptr is NULL: + if input_graph.weights_view_ptr is NULL and input_graph.weights_view_ptr_ptr is NULL: cupy_weights = None else: weights_ptr = cugraph_random_walk_result_get_weights(result_ptr) diff --git a/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx b/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx index 7cc0d8ab4c1..240c374353d 100644 --- a/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx +++ b/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx @@ -129,7 +129,7 @@ def weakly_connected_components(ResourceHandle resource_handle, >>> graph_props = pylibcugraph.GraphProperties( ... is_symmetric=True, is_multigraph=False) >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, ... store_transposed=False, renumber=True, do_expensive_check=False) >>> (vertices, labels) = weakly_connected_components( ... resource_handle, G, None, None, None, None, False)