From 119816cfe04be421bf96baa8075d342f72cbbfc4 Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Fri, 17 Nov 2023 09:16:31 -0500 Subject: [PATCH 1/2] [BUG] Fix Incorrect Edge Index, Directory Selection in cuGraph-PyG Loader (#3978) Fixes three major bugs: 1. Edge index is set to [dst, dst] instead of [dst, src] in some cases 2. The sample directory is always set to a new temporary directory rather than the path given 3. The version of `pylibcugraphops` in `meta.yaml` is wrong and causes the wrong packages to be resolved This PR also simplifies `ci/test_python.sh` by doing only a single conda install when creating the `test_cugraph_pyg` environment. Closes #3959 Authors: - Alex Barghi (https://github.com/alexbarghi-nv) - Naim (https://github.com/naimnv) Approvers: - Brad Rees (https://github.com/BradReesWork) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cugraph/pull/3978 --- ci/test_python.sh | 31 ++++++----- conda/recipes/cugraph-pyg/meta.yaml | 2 +- dependencies.yaml | 4 +- .../conda/cugraph_pyg_dev_cuda-118.yaml | 4 +- .../cugraph_pyg/data/cugraph_store.py | 12 +++-- .../cugraph_pyg/loader/cugraph_node_loader.py | 48 ++++++++++------- .../tests/mg/test_mg_cugraph_store.py | 2 +- .../cugraph_pyg/tests/test_cugraph_loader.py | 54 +++++++++++++++++-- .../cugraph_pyg/tests/test_cugraph_store.py | 2 +- 9 files changed, 112 insertions(+), 47 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 1690ce2f15b..273d3c93482 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -197,27 +197,26 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then conda activate test_cugraph_pyg set -u - # Install pytorch + # Will automatically install built dependencies of cuGraph-PyG rapids-mamba-retry install \ - --force-reinstall \ - --channel pyg \ + --channel "${CPP_CHANNEL}" \ + --channel "${PYTHON_CHANNEL}" \ --channel pytorch \ --channel nvidia \ - 'pyg=2.3' \ - 'pytorch=2.0.0' \ - 'pytorch-cuda=11.8' + --channel pyg \ + --channel rapidsai-nightly \ + "cugraph-pyg" \ + "pytorch>=2.0,<2.1" \ + "pytorch-cuda=11.8" # Install pyg dependencies (which requires pip) - pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cu118.html - - rapids-mamba-retry install \ - --channel "${CPP_CHANNEL}" \ - --channel "${PYTHON_CHANNEL}" \ - libcugraph \ - pylibcugraph \ - pylibcugraphops \ - cugraph \ - cugraph-pyg + pip install \ + pyg_lib \ + torch_scatter \ + torch_sparse \ + torch_cluster \ + torch_spline_conv \ + -f https://data.pyg.org/whl/torch-2.0.0+cu118.html rapids-print-env diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml index 07caf07daab..a2a02a1d9f6 100644 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ b/conda/recipes/cugraph-pyg/meta.yaml @@ -34,7 +34,7 @@ requirements: - cupy >=12.0.0 - cugraph ={{ version }} - pylibcugraphops ={{ minor_version }} - - pyg >=2.3,<2.4 + - pyg >=2.3,<2.5 tests: imports: diff --git a/dependencies.yaml b/dependencies.yaml index 13f100610cf..a89acd9288b 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -497,9 +497,9 @@ dependencies: - output_types: [conda] packages: - cugraph==23.12.* - - pytorch==2.0 + - pytorch>=2.0 - pytorch-cuda==11.8 - - pyg=2.3.1=*torch_2.0.0*cu118* + - pyg>=2.4.0 depends_on_rmm: common: diff --git a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml index f98eab430ba..71d1c7e389c 100644 --- a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml +++ b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml @@ -13,13 +13,13 @@ dependencies: - cugraph==23.12.* - pandas - pre-commit -- pyg=2.3.1=*torch_2.0.0*cu118* +- pyg>=2.4.0 - pylibcugraphops==23.12.* - pytest - pytest-benchmark - pytest-cov - pytest-xdist - pytorch-cuda==11.8 -- pytorch==2.0 +- pytorch>=2.0 - scipy name: cugraph_pyg_dev_cuda-118 diff --git a/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py b/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py index d1b24543956..edeeface4c4 100644 --- a/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py @@ -210,7 +210,10 @@ class EXPERIMENTAL__CuGraphStore: def __init__( self, F: cugraph.gnn.FeatureStore, - G: Union[Dict[str, Tuple[TensorType]], Dict[str, int]], + G: Union[ + Dict[Tuple[str, str, str], Tuple[TensorType]], + Dict[Tuple[str, str, str], int], + ], num_nodes_dict: Dict[str, int], *, multi_gpu: bool = False, @@ -744,7 +747,7 @@ def _subgraph(self, edge_types: List[tuple] = None) -> cugraph.MultiGraph: def _get_vertex_groups_from_sample( self, nodes_of_interest: TensorType, is_sorted: bool = False - ) -> dict: + ) -> Dict[str, torch.Tensor]: """ Given a tensor of nodes of interest, this method a single dictionary, noi_index. @@ -808,7 +811,10 @@ def _get_sample_from_vertex_groups( def _get_renumbered_edge_groups_from_sample( self, sampling_results: cudf.DataFrame, noi_index: dict - ) -> Tuple[dict, dict]: + ) -> Tuple[ + Dict[Tuple[str, str, str], torch.Tensor], + Tuple[Dict[Tuple[str, str, str], torch.Tensor]], + ]: """ Given a cudf (NOT dask_cudf) DataFrame of sampling results and a dictionary of non-renumbered vertex ids grouped by vertex type, this method diff --git a/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py index 8552e7412e0..ad8d22e255e 100644 --- a/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py +++ b/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py @@ -15,6 +15,7 @@ import os import re +import warnings import cupy import cudf @@ -159,23 +160,34 @@ def __init__( if batch_size is None or batch_size < 1: raise ValueError("Batch size must be >= 1") - self.__directory = tempfile.TemporaryDirectory(dir=directory) + self.__directory = ( + tempfile.TemporaryDirectory() if directory is None else directory + ) if isinstance(num_neighbors, dict): raise ValueError("num_neighbors dict is currently unsupported!") - renumber = ( - True - if ( - (len(self.__graph_store.node_types) == 1) - and (len(self.__graph_store.edge_types) == 1) + if "renumber" in kwargs: + warnings.warn( + "Setting renumbering manually could result in invalid output," + " please ensure you intended to do this." + ) + renumber = kwargs.pop("renumber") + else: + renumber = ( + True + if ( + (len(self.__graph_store.node_types) == 1) + and (len(self.__graph_store.edge_types) == 1) + ) + else False ) - else False - ) bulk_sampler = BulkSampler( batch_size, - self.__directory.name, + self.__directory + if isinstance(self.__directory, str) + else self.__directory.name, self.__graph_store._subgraph(edge_types), fanout_vals=num_neighbors, with_replacement=replace, @@ -219,7 +231,13 @@ def __init__( ) bulk_sampler.flush() - self.__input_files = iter(os.listdir(self.__directory.name)) + self.__input_files = iter( + os.listdir( + self.__directory + if isinstance(self.__directory, str) + else self.__directory.name + ) + ) def __next__(self): from time import perf_counter @@ -423,9 +441,6 @@ def __next__(self): sampler_output.edge, ) else: - if self.__graph_store.order == "CSR": - raise ValueError("CSR format incompatible with CSC output") - out = filter_cugraph_store_csc( self.__feature_store, self.__graph_store, @@ -437,11 +452,8 @@ def __next__(self): # Account for CSR format in cuGraph vs. CSC format in PyG if self.__coo and self.__graph_store.order == "CSC": - for node_type in out.edge_index_dict: - out[node_type].edge_index[0], out[node_type].edge_index[1] = ( - out[node_type].edge_index[1], - out[node_type].edge_index[0], - ) + for edge_type in out.edge_index_dict: + out[edge_type].edge_index = out[edge_type].edge_index.flip(dims=[0]) out.set_value_dict("num_sampled_nodes", sampler_output.num_sampled_nodes) out.set_value_dict("num_sampled_edges", sampler_output.num_sampled_edges) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py b/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py index ed7f70034e2..13c9c90c7c2 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py @@ -120,7 +120,7 @@ def test_get_edge_index(graph, edge_index_type, dask_client): G[et][0] = dask_cudf.from_cudf(cudf.Series(G[et][0]), npartitions=1) G[et][1] = dask_cudf.from_cudf(cudf.Series(G[et][1]), npartitions=1) - cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) + cugraph_store = CuGraphStore(F, G, N, order="CSC", multi_gpu=True) for pyg_can_edge_type in G: src, dst = cugraph_store.get_edge_index( diff --git a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py index 853836dc2a6..27b73bf7d35 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py @@ -18,6 +18,7 @@ import cudf import cupy +import numpy as np from cugraph_pyg.loader import CuGraphNeighborLoader from cugraph_pyg.loader import BulkSampleLoader @@ -27,6 +28,8 @@ from cugraph.gnn import FeatureStore from cugraph.utilities.utils import import_optional, MissingModule +from typing import Dict, Tuple + torch = import_optional("torch") torch_geometric = import_optional("torch_geometric") trim_to_layer = import_optional("torch_geometric.utils.trim_to_layer") @@ -40,7 +43,11 @@ @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_cugraph_loader_basic(karate_gnn): +def test_cugraph_loader_basic( + karate_gnn: Tuple[ + FeatureStore, Dict[Tuple[str, str, str], np.ndarray], Dict[str, int] + ] +): F, G, N = karate_gnn cugraph_store = CuGraphStore(F, G, N, order="CSR") loader = CuGraphNeighborLoader( @@ -66,7 +73,11 @@ def test_cugraph_loader_basic(karate_gnn): @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") -def test_cugraph_loader_hetero(karate_gnn): +def test_cugraph_loader_hetero( + karate_gnn: Tuple[ + FeatureStore, Dict[Tuple[str, str, str], np.ndarray], Dict[str, int] + ] +): F, G, N = karate_gnn cugraph_store = CuGraphStore(F, G, N, order="CSR") loader = CuGraphNeighborLoader( @@ -342,7 +353,7 @@ def test_cugraph_loader_e2e_coo(): @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") @pytest.mark.skipif(not HAS_TORCH_SPARSE, reason="torch-sparse not available") @pytest.mark.parametrize("framework", ["pyg", "cugraph-ops"]) -def test_cugraph_loader_e2e_csc(framework): +def test_cugraph_loader_e2e_csc(framework: str): m = [2, 9, 99, 82, 9, 3, 18, 1, 12] x = torch.randint(3000, (256, 256)).to(torch.float32) F = FeatureStore() @@ -442,3 +453,40 @@ def test_cugraph_loader_e2e_csc(framework): x = x.narrow(dim=0, start=0, length=s - num_sampled_nodes[1]) assert list(x.shape) == [1, 1] + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.parametrize("directory", ["local", "temp"]) +def test_load_directory( + karate_gnn: Tuple[ + FeatureStore, Dict[Tuple[str, str, str], np.ndarray], Dict[str, int] + ], + directory: str, +): + if directory == "local": + local_dir = tempfile.TemporaryDirectory(dir=".") + + cugraph_store = CuGraphStore(*karate_gnn) + cugraph_loader = CuGraphNeighborLoader( + (cugraph_store, cugraph_store), + torch.arange(8, dtype=torch.int64), + 2, + num_neighbors=[8, 4, 2], + random_state=62, + replace=False, + directory=None if directory == "temp" else local_dir.name, + batches_per_partition=1, + ) + + it = iter(cugraph_loader) + next_batch = next(it) + assert next_batch is not None + + if directory == "local": + assert len(os.listdir(local_dir.name)) == 4 + + count = 1 + while next(it, None) is not None: + count += 1 + + assert count == 4 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_store.py b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_store.py index da3043760d4..b39ebad8254 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_store.py @@ -113,7 +113,7 @@ def test_get_edge_index(graph, edge_index_type): G[et][0] = cudf.Series(G[et][0]) G[et][1] = cudf.Series(G[et][1]) - cugraph_store = CuGraphStore(F, G, N) + cugraph_store = CuGraphStore(F, G, N, order="CSC") for pyg_can_edge_type in G: src, dst = cugraph_store.get_edge_index( From 06f082b43aab84408d63a907327b9524f1cd0229 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 17 Nov 2023 10:36:35 -0600 Subject: [PATCH 2/2] Enable build concurrency for nightly and merge triggers. (#4009) --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index ccfdb826812..0f490283795 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -22,7 +22,7 @@ on: default: nightly concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} cancel-in-progress: true jobs: