From 58075dd39b4ef879d2e2ec1871be1e4e72961f4f Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Fri, 6 Dec 2024 13:26:45 -0500 Subject: [PATCH 1/4] [BUG] Set Max PyTorch Version, Skip 11.4 Tests Using WholeGraph (#4808) Set Max PyTorch Version, Skip 11.4 Tests Using WholeGraph Authors: - Alex Barghi (https://github.com/alexbarghi-nv) Approvers: - Bradley Dice (https://github.com/bdice) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4808 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- dependencies.yaml | 2 +- .../data_store/test_gnn_feat_storage_wholegraph.py | 13 +++++++++++++ 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index e7fa6d4ee42..c045db32c65 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -53,7 +53,7 @@ dependencies: - pytest-cov - pytest-xdist - python-louvain -- pytorch>=2.3 +- pytorch>=2.3,<2.5a0 - raft-dask==24.12.*,>=0.0.0a0 - rapids-build-backend>=0.3.1,<0.4.0.dev0 - rapids-dask-dependency==24.12.*,>=0.0.0a0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 7b7ac92b59b..333d9dc8c2f 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -58,7 +58,7 @@ dependencies: - pytest-cov - pytest-xdist - python-louvain -- pytorch>=2.3 +- pytorch>=2.3,<2.5a0 - raft-dask==24.12.*,>=0.0.0a0 - rapids-build-backend>=0.3.1,<0.4.0.dev0 - rapids-dask-dependency==24.12.*,>=0.0.0a0 diff --git a/dependencies.yaml b/dependencies.yaml index 7b25ac05d62..d081655f8a9 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -507,7 +507,7 @@ dependencies: common: - output_types: [conda] packages: - - &pytorch_conda pytorch>=2.3 + - &pytorch_conda pytorch>=2.3,<2.5a0 - torchdata - pydantic - ogb diff --git a/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage_wholegraph.py b/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage_wholegraph.py index 30336490312..f760ef3e1ba 100644 --- a/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage_wholegraph.py +++ b/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage_wholegraph.py @@ -15,6 +15,8 @@ import numpy as np import os +import numba.cuda + from cugraph.gnn import FeatureStore from cugraph.utilities.utils import import_optional, MissingModule @@ -25,6 +27,11 @@ wgth = import_optional("pylibwholegraph.torch") +def get_cudart_version(): + major, minor = numba.cuda.runtime.get_version() + return major * 1000 + minor * 10 + + def runtest(rank: int, world_size: int): torch.cuda.set_device(rank) @@ -66,6 +73,9 @@ def runtest(rank: int, world_size: int): @pytest.mark.skipif( isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available" ) +@pytest.mark.skipif( + get_cudart_version() < 11080, reason="not compatible with CUDA < 11.8" +) def test_feature_storage_wholegraph_backend(): world_size = torch.cuda.device_count() print("gpu count:", world_size) @@ -81,6 +91,9 @@ def test_feature_storage_wholegraph_backend(): @pytest.mark.skipif( isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available" ) +@pytest.mark.skipif( + get_cudart_version() < 11080, reason="not compatible with CUDA < 11.8" +) def test_feature_storage_wholegraph_backend_mg(): world_size = torch.cuda.device_count() print("gpu count:", world_size) From 2d32c86fefa88de7c7f29ac9b04a204f4ef027c8 Mon Sep 17 00:00:00 2001 From: Ralph Liu <137829296+nv-rliu@users.noreply.github.com> Date: Wed, 11 Dec 2024 23:40:23 +0900 Subject: [PATCH 2/4] Fix SSL Error (#4825) Addresses errors in `wheel-tests` seen [here](https://github.com/rapidsai/cugraph/issues/4818) Installs SSL certificates with `certifi` as outlined in this issue [here](https://github.com/rapidsai/build-infra/issues/56) --- conda/environments/all_cuda-118_arch-x86_64.yaml | 1 + conda/environments/all_cuda-125_arch-x86_64.yaml | 1 + dependencies.yaml | 2 ++ python/cugraph-service/server/pyproject.toml | 1 + python/cugraph/cugraph/datasets/dataset.py | 15 ++++++++------- python/cugraph/cugraph/testing/resultset.py | 3 --- python/cugraph/cugraph/tests/conftest.py | 13 ++++++++++++- python/cugraph/pyproject.toml | 1 + 8 files changed, 26 insertions(+), 11 deletions(-) mode change 100644 => 100755 dependencies.yaml diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index c045db32c65..23d09269a90 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -10,6 +10,7 @@ dependencies: - aiohttp - breathe - c-compiler +- certifi - cmake>=3.26.4,!=3.30.0 - cuda-nvtx - cuda-version=11.8 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 333d9dc8c2f..afd4dec41c7 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -10,6 +10,7 @@ dependencies: - aiohttp - breathe - c-compiler +- certifi - cmake>=3.26.4,!=3.30.0 - cuda-cudart-dev - cuda-nvcc diff --git a/dependencies.yaml b/dependencies.yaml old mode 100644 new mode 100755 index d081655f8a9..142fe0c987e --- a/dependencies.yaml +++ b/dependencies.yaml @@ -470,6 +470,7 @@ dependencies: common: - output_types: [conda, requirements] packages: + - certifi - ipython - notebook>=0.5.0 - output_types: [conda] @@ -489,6 +490,7 @@ dependencies: common: - output_types: [conda, pyproject] packages: + - certifi - networkx>=2.5.1 - *numpy - python-louvain diff --git a/python/cugraph-service/server/pyproject.toml b/python/cugraph-service/server/pyproject.toml index f388fd4c126..caafd1fe507 100644 --- a/python/cugraph-service/server/pyproject.toml +++ b/python/cugraph-service/server/pyproject.toml @@ -46,6 +46,7 @@ cugraph-service-server = "cugraph_service_server.__main__:main" [project.optional-dependencies] test = [ + "certifi", "networkx>=2.5.1", "numpy>=1.23,<3.0a0", "pandas", diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py index 63389cbc16a..763ae8033f5 100644 --- a/python/cugraph/cugraph/datasets/dataset.py +++ b/python/cugraph/cugraph/datasets/dataset.py @@ -11,14 +11,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cudf -import dask_cudf -import yaml import os import pandas as pd -import cugraph.dask as dcg +import yaml from pathlib import Path -import urllib.request +from urllib.request import urlretrieve + +import cudf +import cugraph.dask as dcg +import dask_cudf from cugraph.structure.graph_classes import Graph @@ -142,7 +143,7 @@ def __download_csv(self, url): filename = self.metadata["name"] + self.metadata["file_type"] if self._dl_path.path.is_dir(): self._path = self._dl_path.path / filename - urllib.request.urlretrieve(url, str(self._path)) + urlretrieve(url, str(self._path)) else: raise RuntimeError( @@ -458,7 +459,7 @@ def download_all(force=False): filename = meta["name"] + meta["file_type"] save_to = default_download_dir.path / filename if not save_to.is_file() or force: - urllib.request.urlretrieve(meta["url"], str(save_to)) + urlretrieve(meta["url"], str(save_to)) def set_download_dir(path): diff --git a/python/cugraph/cugraph/testing/resultset.py b/python/cugraph/cugraph/testing/resultset.py index f557ad13089..f7d2521752c 100644 --- a/python/cugraph/cugraph/testing/resultset.py +++ b/python/cugraph/cugraph/testing/resultset.py @@ -13,7 +13,6 @@ import warnings import tarfile - import urllib.request import cudf @@ -22,8 +21,6 @@ default_download_dir, ) -# results_dir_path = utils.RAPIDS_DATASET_ROOT_DIR_PATH / "tests" / "resultsets" - class Resultset: """ diff --git a/python/cugraph/cugraph/tests/conftest.py b/python/cugraph/cugraph/tests/conftest.py index d31c2968afe..101a4e6a192 100644 --- a/python/cugraph/cugraph/tests/conftest.py +++ b/python/cugraph/cugraph/tests/conftest.py @@ -23,9 +23,20 @@ # Avoid timeout during shutdown from dask_cuda.utils_test import IncreasedCloseTimeoutNanny -# module-wide fixtures +import certifi +from ssl import create_default_context +from urllib.request import build_opener, HTTPSHandler, install_opener + + +# Install SSL certificates +def pytest_sessionstart(session): + ssl_context = create_default_context(cafile=certifi.where()) + https_handler = HTTPSHandler(context=ssl_context) + install_opener(build_opener(https_handler)) +# module-wide fixtures + # Spoof the gpubenchmark fixture if it's not available so that asvdb and # rapids-pytest-benchmark do not need to be installed to run tests. if "gpubenchmark" not in globals(): diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml index d3960ab5d32..bb5b5256603 100644 --- a/python/cugraph/pyproject.toml +++ b/python/cugraph/pyproject.toml @@ -46,6 +46,7 @@ classifiers = [ [project.optional-dependencies] test = [ + "certifi", "networkx>=2.5.1", "numpy>=1.23,<3.0a0", "pandas", From 1f9e3eb5afc302b3462f7bcd159240ac5a660e7f Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Wed, 11 Dec 2024 13:10:58 -0500 Subject: [PATCH 3/4] Update Changelog [skip ci] --- CHANGELOG.md | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d934273d0a7..c6320c3527f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,79 @@ +# cugraph 24.12.00 (11 Dec 2024) + +## 🚨 Breaking Changes + +- Fix SSL Error ([#4825](https://github.com/rapidsai/cugraph/pull/4825)) [@nv-rliu](https://github.com/nv-rliu) +- [BUG] Set Max PyTorch Version, Skip 11.4 Tests Using WholeGraph ([#4808](https://github.com/rapidsai/cugraph/pull/4808)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Remove GNN Packages ([#4765](https://github.com/rapidsai/cugraph/pull/4765)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- remove cugraph-dgl and cugraph-pyg building/testing/publishing ([#4752](https://github.com/rapidsai/cugraph/pull/4752)) [@jameslamb](https://github.com/jameslamb) +- Remove `nx-cugraph` build/test/publish from cugraph ([#4748](https://github.com/rapidsai/cugraph/pull/4748)) [@nv-rliu](https://github.com/nv-rliu) +- Remove CMake/C++ references to cugraph-ops ([#4744](https://github.com/rapidsai/cugraph/pull/4744)) [@ChuckHastings](https://github.com/ChuckHastings) +- Delete the deprecated data_type_id_t enum ([#4737](https://github.com/rapidsai/cugraph/pull/4737)) [@ChuckHastings](https://github.com/ChuckHastings) +- Don't compile int32_t/int64_t vertex_t/edge_t combinations ([#4720](https://github.com/rapidsai/cugraph/pull/4720)) [@ChuckHastings](https://github.com/ChuckHastings) +- Remove deprecated C API functions for graph creation/graph free ([#4718](https://github.com/rapidsai/cugraph/pull/4718)) [@ChuckHastings](https://github.com/ChuckHastings) + +## 🐛 Bug Fixes + +- Fix SSL Error ([#4825](https://github.com/rapidsai/cugraph/pull/4825)) [@nv-rliu](https://github.com/nv-rliu) +- [BUG] Set Max PyTorch Version, Skip 11.4 Tests Using WholeGraph ([#4808](https://github.com/rapidsai/cugraph/pull/4808)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Fix CCCL 2.7.0-rc2 compile issue by removing reference from values. ([#4799](https://github.com/rapidsai/cugraph/pull/4799)) [@bdice](https://github.com/bdice) +- Re-enable balanced edge cut test ([#4781](https://github.com/rapidsai/cugraph/pull/4781)) [@ChuckHastings](https://github.com/ChuckHastings) +- Fix debug build failure ([#4774](https://github.com/rapidsai/cugraph/pull/4774)) [@seunghwak](https://github.com/seunghwak) +- Add sphinx-lint pre-commit and some docs fixes ([#4771](https://github.com/rapidsai/cugraph/pull/4771)) [@eriknw](https://github.com/eriknw) +- Fix improper initialization of degree_type ([#4755](https://github.com/rapidsai/cugraph/pull/4755)) [@ChuckHastings](https://github.com/ChuckHastings) +- Put a ceiling on cuda-python ([#4747](https://github.com/rapidsai/cugraph/pull/4747)) [@bdice](https://github.com/bdice) +- Fix MG similarity issues ([#4741](https://github.com/rapidsai/cugraph/pull/4741)) [@ChuckHastings](https://github.com/ChuckHastings) +- Address Leiden clustering generating too many clusters ([#4730](https://github.com/rapidsai/cugraph/pull/4730)) [@ChuckHastings](https://github.com/ChuckHastings) +- Add support for storing results for all k-values ([#4728](https://github.com/rapidsai/cugraph/pull/4728)) [@nv-rliu](https://github.com/nv-rliu) +- Remove `--collect-only` Option Left by Accident ([#4727](https://github.com/rapidsai/cugraph/pull/4727)) [@nv-rliu](https://github.com/nv-rliu) +- Update nx-cugraph to NetworkX 3.4 ([#4717](https://github.com/rapidsai/cugraph/pull/4717)) [@eriknw](https://github.com/eriknw) +- Symmetrize edgelist when creating a CSR graph ([#4716](https://github.com/rapidsai/cugraph/pull/4716)) [@jnke2016](https://github.com/jnke2016) + +## 📖 Documentation + +- nx-cugraph: add `bipartite_betweenness_centrality` to supported algorithms ([#4778](https://github.com/rapidsai/cugraph/pull/4778)) [@eriknw](https://github.com/eriknw) +- Notebook to run Centrality against PatentsView data for Medium Blog ([#4769](https://github.com/rapidsai/cugraph/pull/4769)) [@acostadon](https://github.com/acostadon) +- Drop support for NetworkX 3.0 and 3.1 for nx-cugraph ([#4766](https://github.com/rapidsai/cugraph/pull/4766)) [@eriknw](https://github.com/eriknw) +- Update nx-cugraph Docs URLs ([#4761](https://github.com/rapidsai/cugraph/pull/4761)) [@nv-rliu](https://github.com/nv-rliu) +- Update `nx-cugraph` README with New Env Variable ([#4705](https://github.com/rapidsai/cugraph/pull/4705)) [@nv-rliu](https://github.com/nv-rliu) + +## 🚀 New Features + +- [FEA] Support Edge ID Lookup in PyLibcuGraph ([#4687](https://github.com/rapidsai/cugraph/pull/4687)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) + +## 🛠️ Improvements + +- Increase max_iterations in MG HITS TEST ([#4783](https://github.com/rapidsai/cugraph/pull/4783)) [@seunghwak](https://github.com/seunghwak) +- Updates READMEs, updates `core_number` to properly ignore `degree_type`, minor cleanup ([#4776](https://github.com/rapidsai/cugraph/pull/4776)) [@rlratzel](https://github.com/rlratzel) +- Remove edge renumber map from the homogeneous sampling API ([#4775](https://github.com/rapidsai/cugraph/pull/4775)) [@jnke2016](https://github.com/jnke2016) +- Remove GNN Packages ([#4765](https://github.com/rapidsai/cugraph/pull/4765)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Remove cugraph-equivariance ([#4762](https://github.com/rapidsai/cugraph/pull/4762)) [@BradReesWork](https://github.com/BradReesWork) +- deprecate NetworkX Graph as input ([#4759](https://github.com/rapidsai/cugraph/pull/4759)) [@BradReesWork](https://github.com/BradReesWork) +- Remove `nx-cugraph` from this repo ([#4756](https://github.com/rapidsai/cugraph/pull/4756)) [@eriknw](https://github.com/eriknw) +- enforce wheel size limits, README formatting in CI ([#4754](https://github.com/rapidsai/cugraph/pull/4754)) [@jameslamb](https://github.com/jameslamb) +- remove cugraph-dgl and cugraph-pyg building/testing/publishing ([#4752](https://github.com/rapidsai/cugraph/pull/4752)) [@jameslamb](https://github.com/jameslamb) +- Primitives & BFS performance improvements ([#4751](https://github.com/rapidsai/cugraph/pull/4751)) [@seunghwak](https://github.com/seunghwak) +- extract the edgelist from the graph ([#4750](https://github.com/rapidsai/cugraph/pull/4750)) [@jnke2016](https://github.com/jnke2016) +- Remove `nx-cugraph` build/test/publish from cugraph ([#4748](https://github.com/rapidsai/cugraph/pull/4748)) [@nv-rliu](https://github.com/nv-rliu) +- Remove CMake/C++ references to cugraph-ops ([#4744](https://github.com/rapidsai/cugraph/pull/4744)) [@ChuckHastings](https://github.com/ChuckHastings) +- add telemetry ([#4740](https://github.com/rapidsai/cugraph/pull/4740)) [@msarahan](https://github.com/msarahan) +- Delete the deprecated data_type_id_t enum ([#4737](https://github.com/rapidsai/cugraph/pull/4737)) [@ChuckHastings](https://github.com/ChuckHastings) +- Updates README with new dataset, removes mention of script no longer used ([#4736](https://github.com/rapidsai/cugraph/pull/4736)) [@rlratzel](https://github.com/rlratzel) +- devcontainer: replace `VAULT_HOST` with `AWS_ROLE_ARN` ([#4732](https://github.com/rapidsai/cugraph/pull/4732)) [@jjacobelli](https://github.com/jjacobelli) +- Adds new dataset for benchmarking in the 100k node 1M edge range, adds additional k-values for BC benchmarks ([#4726](https://github.com/rapidsai/cugraph/pull/4726)) [@rlratzel](https://github.com/rlratzel) +- re-run all CI when files in .github/workflows change ([#4723](https://github.com/rapidsai/cugraph/pull/4723)) [@jameslamb](https://github.com/jameslamb) +- Don't compile int32_t/int64_t vertex_t/edge_t combinations ([#4720](https://github.com/rapidsai/cugraph/pull/4720)) [@ChuckHastings](https://github.com/ChuckHastings) +- print sccache stats in builds ([#4719](https://github.com/rapidsai/cugraph/pull/4719)) [@jameslamb](https://github.com/jameslamb) +- Remove deprecated C API functions for graph creation/graph free ([#4718](https://github.com/rapidsai/cugraph/pull/4718)) [@ChuckHastings](https://github.com/ChuckHastings) +- Clean up the use of cuco hash functions ([#4707](https://github.com/rapidsai/cugraph/pull/4707)) [@PointKernel](https://github.com/PointKernel) +- Relax PyTorch upper bound (allowing 2.4) ([#4703](https://github.com/rapidsai/cugraph/pull/4703)) [@jakirkham](https://github.com/jakirkham) +- combine pip install calls in wheel-testing scripts ([#4701](https://github.com/rapidsai/cugraph/pull/4701)) [@jameslamb](https://github.com/jameslamb) +- Merge branch-24.10 into branch-24.12 ([#4697](https://github.com/rapidsai/cugraph/pull/4697)) [@jameslamb](https://github.com/jameslamb) +- Merge branch-24.10 into branch-24.12 ([#4682](https://github.com/rapidsai/cugraph/pull/4682)) [@jameslamb](https://github.com/jameslamb) +- Update all rmm imports to use pylibrmm/librmm ([#4671](https://github.com/rapidsai/cugraph/pull/4671)) [@Matt711](https://github.com/Matt711) +- Prune workflows based on changed files ([#4634](https://github.com/rapidsai/cugraph/pull/4634)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- support heterogenous fanout type ([#4608](https://github.com/rapidsai/cugraph/pull/4608)) [@jnke2016](https://github.com/jnke2016) + # cugraph 24.10.00 (9 Oct 2024) ## 🚨 Breaking Changes From d2f53365d982b28e4eb023824cd7a91bf9c1e281 Mon Sep 17 00:00:00 2001 From: Mike Sarahan Date: Thu, 12 Dec 2024 10:42:12 -0600 Subject: [PATCH 4/4] gate telemetry dispatch calls on TELEMETRY_ENABLED env var (#4816) Because of the switch away from certificates/mTLS, we are having to rework a few things. In the meantime, telemetry jobs are failing. This PR adds a switch to turn all of the telemetry stuff off - to skip it instead. It is meant to be controlled by an org-wide environment variable, which can be applied to individual repos by ops. At the time of submitting this PR, the environment variable is 'false' and no telemetry is being reported. Authors: - Mike Sarahan (https://github.com/msarahan) - Ralph Liu (https://github.com/nv-rliu) Approvers: - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cugraph/pull/4816 --- .github/workflows/pr.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index c9e41475a1e..a018544c96e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -40,6 +40,7 @@ jobs: OTEL_SERVICE_NAME: "pr-cugraph" steps: - name: Telemetry setup + if: ${{ vars.TELEMETRY_ENABLED == 'true' }} uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main changed-files: secrets: inherit @@ -181,7 +182,7 @@ jobs: telemetry-summarize: runs-on: ubuntu-latest needs: pr-builder - if: always() + if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }} continue-on-error: true steps: - name: Load stashed telemetry env vars