From f519ac1b138c390fd8b9197ad91efe857cdb44f2 Mon Sep 17 00:00:00 2001 From: Ralph Liu <137829296+nv-rliu@users.noreply.github.com> Date: Wed, 19 Jun 2024 02:00:44 -0400 Subject: [PATCH] Forward merge branch-24.06 into branch-24.08 (#4489) Replaces https://github.com/rapidsai/cugraph/pull/4476 Authors: - Ralph Liu (https://github.com/nv-rliu) - Alex Barghi (https://github.com/alexbarghi-nv) - Tingyu Wang (https://github.com/tingyu66) - Bradley Dice (https://github.com/bdice) - James Lamb (https://github.com/jameslamb) Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) - Chuck Hastings (https://github.com/ChuckHastings) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cugraph/pull/4489 --- ci/build_wheel.sh | 8 ++++- ci/build_wheel_cugraph.sh | 6 +++- ci/build_wheel_pylibcugraph.sh | 6 +++- ci/test_python.sh | 15 ++++------ ci/test_wheel_cugraph-pyg.sh | 1 - ci/test_wheel_nx-cugraph.sh | 7 ++++- .../all_cuda-118_arch-x86_64.yaml | 3 +- .../all_cuda-122_arch-x86_64.yaml | 3 +- conda/recipes/libcugraph/meta.yaml | 2 +- dependencies.yaml | 29 +++++++++++++++++-- python/cugraph-dgl/pyproject.toml | 2 ++ .../nn/tensor_product_conv.py | 10 ++++++- .../tests/test_tensor_product_conv.py | 9 +++++- python/cugraph-pyg/pyproject.toml | 2 ++ .../cugraph/gnn/data_loading/dist_sampler.py | 24 +++++++++++---- .../tests/sampling/test_bulk_sampler_io.py | 3 +- .../tests/sampling/test_dist_sampler.py | 4 +++ .../tests/sampling/test_dist_sampler_mg.py | 4 +++ 18 files changed, 110 insertions(+), 28 deletions(-) diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index c980ed320dc..da0f3617f3f 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -56,7 +56,13 @@ fi cd "${package_dir}" -python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check +python -m pip wheel \ + -w dist \ + -vvv \ + --no-deps \ + --disable-pip-version-check \ + --extra-index-url https://pypi.nvidia.com \ + . # pure-python packages should be marked as pure, and not have auditwheel run on them. if [[ ${package_name} == "nx-cugraph" ]] || \ diff --git a/ci/build_wheel_cugraph.sh b/ci/build_wheel_cugraph.sh index ffd6445f8d5..6545ee3eca0 100755 --- a/ci/build_wheel_cugraph.sh +++ b/ci/build_wheel_cugraph.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. set -euo pipefail @@ -12,6 +12,10 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME=pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX} rapids-download-wheels-from-s3 ./local-pylibcugraph export PIP_FIND_LINKS=$(pwd)/local-pylibcugraph +PARALLEL_LEVEL=$(python -c \ + "from math import ceil; from multiprocessing import cpu_count; print(ceil(cpu_count()/4))") + export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" +export SKBUILD_BUILD_TOOL_ARGS="-j${PARALLEL_LEVEL};-l${PARALLEL_LEVEL}" ./ci/build_wheel.sh cugraph python/cugraph diff --git a/ci/build_wheel_pylibcugraph.sh b/ci/build_wheel_pylibcugraph.sh index 7c5a7299421..ee33ab4a82d 100755 --- a/ci/build_wheel_pylibcugraph.sh +++ b/ci/build_wheel_pylibcugraph.sh @@ -1,8 +1,12 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. set -euo pipefail +PARALLEL_LEVEL=$(python -c \ + "from math import ceil; from multiprocessing import cpu_count; print(ceil(cpu_count()/4))") + export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" +export SKBUILD_BUILD_TOOL_ARGS="-j${PARALLEL_LEVEL};-l${PARALLEL_LEVEL}" ./ci/build_wheel.sh pylibcugraph python/pylibcugraph diff --git a/ci/test_python.sh b/ci/test_python.sh index fdcf88d692a..39159284f45 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -44,6 +44,8 @@ rapids-mamba-retry install \ rapids-logger "Check GPU usage" nvidia-smi +export LD_PRELOAD="${CONDA_PREFIX}/lib/libgomp.so.1" + # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" pushd "${RAPIDS_DATASET_ROOT_DIR}" @@ -191,6 +193,8 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then conda activate test_cugraph_pyg set -u + rapids-print-env + # TODO re-enable logic once CUDA 12 is testable #if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then CONDA_CUDA_VERSION="11.8" @@ -204,18 +208,9 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ - --channel pytorch \ --channel pyg \ - --channel nvidia \ "cugraph-pyg" \ - "pytorch=2.1.0" \ - "pytorch-cuda=${CONDA_CUDA_VERSION}" - - # Install pyg dependencies (which requires pip) - - pip install \ - ogb \ - tensordict + "ogb" pip install \ pyg_lib \ diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 1004063cc38..c55ae033344 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -42,7 +42,6 @@ rapids-retry python -m pip install \ pyg_lib \ torch_scatter \ torch_sparse \ - tensordict \ -f ${PYG_URL} rapids-logger "pytest cugraph-pyg (single GPU)" diff --git a/ci/test_wheel_nx-cugraph.sh b/ci/test_wheel_nx-cugraph.sh index 53d40960fc3..b5adfbcb9d3 100755 --- a/ci/test_wheel_nx-cugraph.sh +++ b/ci/test_wheel_nx-cugraph.sh @@ -1,6 +1,11 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. set -eoxu pipefail +# Download wheels built during this job. +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps +python -m pip install ./local-deps/*.whl + ./ci/test_wheel.sh nx-cugraph python/nx-cugraph diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index a834620b89c..4a235eac7c4 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -42,7 +42,7 @@ dependencies: - numpy>=1.23,<2.0a0 - numpydoc - nvcc_linux-64=11.8 -- openmpi +- openmpi<5.0.3 - packaging>=21 - pandas - pre-commit @@ -56,6 +56,7 @@ dependencies: - pytest-mpl - pytest-xdist - python-louvain +- pytorch>=2.0,<2.2.0a0 - raft-dask==24.8.* - rapids-dask-dependency==24.8.* - recommonmark diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml index a76034d71c3..8275634e55b 100644 --- a/conda/environments/all_cuda-122_arch-x86_64.yaml +++ b/conda/environments/all_cuda-122_arch-x86_64.yaml @@ -47,7 +47,7 @@ dependencies: - numba>=0.57 - numpy>=1.23,<2.0a0 - numpydoc -- openmpi +- openmpi<5.0.3 - packaging>=21 - pandas - pre-commit @@ -61,6 +61,7 @@ dependencies: - pytest-mpl - pytest-xdist - python-louvain +- pytorch>=2.0,<2.2.0a0 - raft-dask==24.8.* - rapids-dask-dependency==24.8.* - recommonmark diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index a4bb361aa6b..cbd97604cff 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -42,7 +42,7 @@ requirements: - {{ compiler('cxx') }} - cmake {{ cmake_version }} - ninja - - openmpi # Required for building cpp-mgtests (multi-GPU tests) + - openmpi<5.0.3 # Required for building cpp-mgtests (multi-GPU tests) - {{ stdlib("c") }} host: {% if cuda_major == "11" %} diff --git a/dependencies.yaml b/dependencies.yaml index 65772a6413a..91593bf9168 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -22,6 +22,7 @@ files: - depends_on_pylibcugraphops - depends_on_pylibwholegraph - depends_on_cupy + - depends_on_pytorch - python_run_cugraph - python_run_nx_cugraph - python_run_cugraph_dgl @@ -62,6 +63,7 @@ files: - cuda_version - depends_on_cudf - depends_on_pylibwholegraph + - depends_on_pytorch - py_version - test_python_common - test_python_cugraph @@ -177,6 +179,7 @@ files: includes: - test_python_common - depends_on_pylibwholegraph + - depends_on_pytorch py_build_cugraph_pyg: output: pyproject pyproject_dir: python/cugraph-pyg @@ -201,6 +204,7 @@ files: includes: - test_python_common - depends_on_pylibwholegraph + - depends_on_pytorch py_build_cugraph_equivariant: output: pyproject pyproject_dir: python/cugraph-equivariant @@ -362,7 +366,7 @@ dependencies: - libraft-headers==24.8.* - libraft==24.8.* - librmm==24.8.* - - openmpi # Required for building cpp-mgtests (multi-GPU tests) + - openmpi<5.0.3 # Required for building cpp-mgtests (multi-GPU tests) specific: - output_types: [conda] matrices: @@ -568,9 +572,30 @@ dependencies: - cugraph==24.8.* - pytorch>=2.0 - pytorch-cuda==11.8 - - tensordict>=0.1.2 + - &tensordict tensordict>=0.1.2 - pyg>=2.5,<2.6 + depends_on_pytorch: + common: + - output_types: [conda] + packages: + - &pytorch_conda pytorch>=2.0,<2.2.0a0 + + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: {cuda: "12.*"} + packages: + - &pytorch_pip torch>=2.0,<2.2.0a0 + - *tensordict + - --extra-index-url=https://download.pytorch.org/whl/cu121 + - matrix: {cuda: "11.*"} + packages: + - *pytorch_pip + - *tensordict + - --extra-index-url=https://download.pytorch.org/whl/cu118 + - {matrix: null, packages: [*pytorch_pip, *tensordict]} + depends_on_pylibwholegraph: common: - output_types: conda diff --git a/python/cugraph-dgl/pyproject.toml b/python/cugraph-dgl/pyproject.toml index b0ee00682a0..8f81d762a21 100644 --- a/python/cugraph-dgl/pyproject.toml +++ b/python/cugraph-dgl/pyproject.toml @@ -38,6 +38,8 @@ test = [ "pytest-cov", "pytest-xdist", "scipy", + "tensordict>=0.1.2", + "torch>=2.0,<2.2.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] diff --git a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py index af1d0efa76c..5a67fbe1502 100644 --- a/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py +++ b/python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py @@ -20,7 +20,15 @@ from cugraph_equivariant.utils import scatter_reduce -from pylibcugraphops.pytorch.operators import FusedFullyConnectedTensorProduct +try: + from pylibcugraphops.pytorch.operators import FusedFullyConnectedTensorProduct +except ImportError as exc: + raise RuntimeError( + "FullyConnectedTensorProductConv is no longer supported in " + "cugraph-equivariant starting from version 24.08. It will be migrated " + "to the new `cuequivariance` package. Please use 24.06 release for the " + "legacy interface." + ) from exc class FullyConnectedTensorProductConv(nn.Module): diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py index a2a13b32cd2..7fbab1dc934 100644 --- a/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py +++ b/python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py @@ -16,7 +16,14 @@ import torch from torch import nn from e3nn import o3 -from cugraph_equivariant.nn import FullyConnectedTensorProductConv + +try: + from cugraph_equivariant.nn import FullyConnectedTensorProductConv +except RuntimeError: + pytest.skip( + "Migrated to cuequivariance package starting from 24.08.", + allow_module_level=True, + ) device = torch.device("cuda:0") diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index 7c4a27999e4..2bf744c817d 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -46,6 +46,8 @@ test = [ "pytest-cov", "pytest-xdist", "scipy", + "tensordict>=0.1.2", + "torch>=2.0,<2.2.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.setuptools] diff --git a/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py b/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py index 52638230b9b..a5a84362a07 100644 --- a/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py +++ b/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py @@ -24,14 +24,12 @@ from typing import Union, List, Dict, Tuple, Iterator, Optional -from cugraph.utilities import import_optional +from cugraph.utilities.utils import import_optional, MissingModule from cugraph.gnn.comms import cugraph_comms_get_raft_handle from cugraph.gnn.data_loading.bulk_sampler_io import create_df_from_disjoint_arrays -# PyTorch is NOT optional but this is required for container builds. -torch = import_optional("torch") - +torch = MissingModule("torch") TensorType = Union["torch.Tensor", cupy.ndarray, cudf.Series] @@ -44,6 +42,8 @@ def __init__( rank: Optional[int] = None, filelist=None, ): + torch = import_optional("torch") + self.__format = format self.__directory = directory @@ -77,6 +77,8 @@ def __iter__(self): return self def __next__(self): + torch = import_optional("torch") + if len(self.__files) > 0: f = self.__files.pop() fname = f[0] @@ -404,6 +406,7 @@ def get_reader(self) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]: """ Returns an iterator over sampled data. """ + torch = import_optional("torch") rank = torch.distributed.get_rank() if self.is_multi_gpu else None return self.__writer.get_reader(rank) @@ -461,6 +464,8 @@ def get_label_list_and_output_rank( label_to_output_comm_rank: TensorType The global mapping of labels to ranks. """ + torch = import_optional("torch") + world_size = torch.distributed.get_world_size() if assume_equal_input_size: @@ -528,6 +533,8 @@ def get_start_batch_offset( and whether the input sizes on each rank are equal (bool). """ + torch = import_optional("torch") + input_size_is_equal = True if self.is_multi_gpu: rank = torch.distributed.get_rank() @@ -581,6 +588,8 @@ def sample_from_nodes( random_state: int The random seed to use for sampling. """ + torch = import_optional("torch") + nodes = torch.as_tensor(nodes, device="cuda") batches_per_call = self._local_seeds_per_call // batch_size @@ -700,6 +709,8 @@ def __init__( ) def __calc_local_seeds_per_call(self, local_seeds_per_call: Optional[int] = None): + torch = import_optional("torch") + if local_seeds_per_call is None: if len([x for x in self.__fanout if x <= 0]) > 0: return UniformNeighborSampler.UNKNOWN_VERTICES_DEFAULT @@ -721,6 +732,7 @@ def sample_batches( random_state: int = 0, assume_equal_input_size: bool = False, ) -> Dict[str, TensorType]: + torch = import_optional("torch") if self.is_multi_gpu: rank = torch.distributed.get_rank() @@ -800,7 +812,9 @@ def sample_batches( compression=self.__compression, compress_per_hop=self.__compress_per_hop, retain_seeds=self._retain_original_seeds, - label_offsets=cupy.asarray(label_offsets), + label_offsets=None + if label_offsets is None + else cupy.asarray(label_offsets), return_dict=True, ) diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py index 5eafe89ea83..ad5b70015de 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -169,6 +169,7 @@ def test_bulk_sampler_io_empty_batch(scratch_dir): @pytest.mark.sg +@pytest.mark.skip(reason="broken") def test_bulk_sampler_io_mock_csr(scratch_dir): major_offsets_array = cudf.Series([0, 5, 10, 15]) minors_array = cudf.Series([1, 2, 3, 4, 8, 9, 1, 3, 4, 5, 3, 0, 4, 9, 1]) diff --git a/python/cugraph/cugraph/tests/sampling/test_dist_sampler.py b/python/cugraph/cugraph/tests/sampling/test_dist_sampler.py index 02676774a02..88589429e85 100644 --- a/python/cugraph/cugraph/tests/sampling/test_dist_sampler.py +++ b/python/cugraph/cugraph/tests/sampling/test_dist_sampler.py @@ -31,6 +31,10 @@ torch = import_optional("torch") +if not isinstance(torch, MissingModule): + from rmm.allocators.torch import rmm_torch_allocator + + torch.cuda.change_current_allocator(rmm_torch_allocator) @pytest.fixture diff --git a/python/cugraph/cugraph/tests/sampling/test_dist_sampler_mg.py b/python/cugraph/cugraph/tests/sampling/test_dist_sampler_mg.py index bf65e46c516..324811e3368 100644 --- a/python/cugraph/cugraph/tests/sampling/test_dist_sampler_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_dist_sampler_mg.py @@ -36,6 +36,10 @@ ) torch = import_optional("torch") +if __name__ == "__main__" and not isinstance(torch, MissingModule): + from rmm.allocators.torch import rmm_torch_allocator + + torch.cuda.change_current_allocator(rmm_torch_allocator) def karate_mg_graph(rank, world_size):