diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
new file mode 100644
index 00000000000..3d0ac075be3
--- /dev/null
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,33 @@
+# syntax=docker/dockerfile:1.5
+
+ARG BASE
+ARG PYTHON_PACKAGE_MANAGER=conda
+
+FROM ${BASE} as pip-base
+
+ENV DEFAULT_VIRTUAL_ENV=rapids
+
+FROM ${BASE} as conda-base
+
+ENV DEFAULT_CONDA_ENV=rapids
+
+FROM ${PYTHON_PACKAGE_MANAGER}-base
+
+ARG CUDA
+ENV CUDAARCHS="RAPIDS"
+ENV CUDA_VERSION="${CUDA_VERSION:-${CUDA}}"
+
+ARG PYTHON_PACKAGE_MANAGER
+ENV PYTHON_PACKAGE_MANAGER="${PYTHON_PACKAGE_MANAGER}"
+
+ENV PYTHONSAFEPATH="1"
+ENV PYTHONUNBUFFERED="1"
+ENV PYTHONDONTWRITEBYTECODE="1"
+
+ENV SCCACHE_REGION="us-east-2"
+ENV SCCACHE_BUCKET="rapids-sccache-devs"
+ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
+ENV HISTFILE="/home/coder/.cache/._bash_history"
+
+# cugraph_pyg's setup.py needs this defined when building in a conda env
+ENV CUDA_HOME="${CUDA_HOME:-/home/coder/.conda/envs/$DEFAULT_CONDA_ENV}"
diff --git a/.devcontainer/README.md b/.devcontainer/README.md
new file mode 100644
index 00000000000..e645c51de8b
--- /dev/null
+++ b/.devcontainer/README.md
@@ -0,0 +1,34 @@
+# cuGraph Development Containers
+
+This directory contains [devcontainer configurations](https://containers.dev/implementors/json_reference/) for using VSCode to [develop in a container](https://code.visualstudio.com/docs/devcontainers/containers) via the `Remote Containers` [extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or [GitHub Codespaces](https://github.com/codespaces).
+
+This container is a turnkey development environment for building and testing the cuGraph C++ and Python libraries.
+
+## Table of Contents
+
+* [Prerequisites](#prerequisites)
+* [Host bind mounts](#host-bind-mounts)
+* [Launch a Dev Container](#launch-a-dev-container)
+## Prerequisites
+
+* [VSCode](https://code.visualstudio.com/download)
+* [VSCode Remote Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
+
+## Host bind mounts
+
+By default, the following directories are bind-mounted into the devcontainer:
+
+* `${repo}:/home/coder/cugraph`
+* `${repo}/../.aws:/home/coder/.aws`
+* `${repo}/../.local:/home/coder/.local`
+* `${repo}/../.cache:/home/coder/.cache`
+* `${repo}/../.conda:/home/coder/.conda`
+* `${repo}/../.config:/home/coder/.config`
+
+This ensures caches, configurations, dependencies, and your commits are persisted on the host across container runs.
+
+## Launch a Dev Container
+
+To launch a devcontainer from VSCode, open the cuGraph repo and select the "Reopen in Container" button in the bottom right:
+
+Alternatively, open the VSCode command palette (typically `cmd/ctrl + shift + P`) and run the "Rebuild and Reopen in Container" command.
diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
new file mode 100644
index 00000000000..cf4ba5aa114
--- /dev/null
+++ b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -0,0 +1,37 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "11.8",
+ "PYTHON_PACKAGE_MANAGER": "conda",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda11.8-envs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda11.8-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
new file mode 100644
index 00000000000..e86a38abbde
--- /dev/null
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -0,0 +1,37 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "11.8",
+ "PYTHON_PACKAGE_MANAGER": "pip",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda11.8-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/ucx:23.10": {"version": "1.14.1"},
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda11.8-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda12.0-conda/devcontainer.json b/.devcontainer/cuda12.0-conda/devcontainer.json
new file mode 100644
index 00000000000..863eeea48ff
--- /dev/null
+++ b/.devcontainer/cuda12.0-conda/devcontainer.json
@@ -0,0 +1,37 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "12.0",
+ "PYTHON_PACKAGE_MANAGER": "conda",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-mambaforge-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.0-envs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/pkgs,target=/home/coder/.conda/pkgs,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.conda/${localWorkspaceFolderBasename}-cuda12.0-envs,target=/home/coder/.conda/envs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.devcontainer/cuda12.0-pip/devcontainer.json b/.devcontainer/cuda12.0-pip/devcontainer.json
new file mode 100644
index 00000000000..c7612771fd3
--- /dev/null
+++ b/.devcontainer/cuda12.0-pip/devcontainer.json
@@ -0,0 +1,37 @@
+{
+ "build": {
+ "context": "${localWorkspaceFolder}/.devcontainer",
+ "dockerfile": "${localWorkspaceFolder}/.devcontainer/Dockerfile",
+ "args": {
+ "CUDA": "12.0",
+ "PYTHON_PACKAGE_MANAGER": "pip",
+ "BASE": "rapidsai/devcontainers:23.10-cpp-llvm16-cuda12.0-ubuntu22.04"
+ }
+ },
+ "hostRequirements": {"gpu": "optional"},
+ "features": {
+ "ghcr.io/rapidsai/devcontainers/features/ucx:23.10": {"version": "1.14.1"},
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:23.10": {}
+ },
+ "overrideFeatureInstallOrder": [
+ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
+ ],
+ "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config/pip,local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs}"],
+ "postAttachCommand": ["/bin/bash", "-c", "if [ ${CODESPACES:-false} = 'true' ]; then . devcontainer-utils-post-attach-command; . rapids-post-attach-command; fi"],
+ "workspaceFolder": "/home/coder",
+ "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/cugraph,type=bind,consistency=consistent",
+ "mounts": [
+ "source=${localWorkspaceFolder}/../.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.config,target=/home/coder/.config,type=bind,consistency=consistent",
+ "source=${localWorkspaceFolder}/../.local/share/${localWorkspaceFolderBasename}-cuda12.0-venvs,target=/home/coder/.local/share/venvs,type=bind,consistency=consistent"
+ ],
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.flake8",
+ "nvidia.nsight-vscode-edition"
+ ]
+ }
+ }
+}
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 02b357c7c88..c01a6fcb94a 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -112,3 +112,23 @@ jobs:
sha: ${{ inputs.sha }}
date: ${{ inputs.date }}
package-name: cugraph
+ wheel-build-nx-cugraph:
+ needs: wheel-publish-pylibcugraph
+ secrets: inherit
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10
+ with:
+ build_type: ${{ inputs.build_type || 'branch' }}
+ branch: ${{ inputs.branch }}
+ sha: ${{ inputs.sha }}
+ date: ${{ inputs.date }}
+ script: ci/build_wheel_nx-cugraph.sh
+ wheel-publish-nx-cugraph:
+ needs: wheel-build-nx-cugraph
+ secrets: inherit
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-publish.yaml@branch-23.10
+ with:
+ build_type: ${{ inputs.build_type || 'branch' }}
+ branch: ${{ inputs.branch }}
+ sha: ${{ inputs.sha }}
+ date: ${{ inputs.date }}
+ package-name: nx-cugraph
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index d2d24d90fbe..7b267d7edf3 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -23,6 +23,9 @@ jobs:
- wheel-tests-pylibcugraph
- wheel-build-cugraph
- wheel-tests-cugraph
+ - wheel-build-nx-cugraph
+ - wheel-tests-nx-cugraph
+ - devcontainer
secrets: inherit
uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@branch-23.10
checks:
@@ -109,3 +112,26 @@ jobs:
with:
build_type: pull-request
script: ci/test_wheel_cugraph.sh
+ wheel-build-nx-cugraph:
+ needs: wheel-tests-pylibcugraph
+ secrets: inherit
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-build.yaml@branch-23.10
+ with:
+ build_type: pull-request
+ script: ci/build_wheel_nx-cugraph.sh
+ wheel-tests-nx-cugraph:
+ needs: wheel-build-nx-cugraph
+ secrets: inherit
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10
+ with:
+ build_type: pull-request
+ script: ci/test_wheel_nx-cugraph.sh
+ devcontainer:
+ secrets: inherit
+ uses: rapidsai/shared-action-workflows/.github/workflows/build-in-devcontainer.yaml@branch-23.10
+ with:
+ extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY
+ build_command: |
+ sccache -z;
+ build-all --verbose;
+ sccache -s;
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 97abca71260..dc9ed60b29e 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -48,3 +48,12 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/test_wheel_cugraph.sh
+ wheel-tests-nx-cugraph:
+ secrets: inherit
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-test.yaml@branch-23.10
+ with:
+ build_type: nightly
+ branch: ${{ inputs.branch }}
+ date: ${{ inputs.date }}
+ sha: ${{ inputs.sha }}
+ script: ci/test_wheel_nx-cugraph.sh
diff --git a/.gitignore b/.gitignore
index 3fda9f8a037..c6bcf6965d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -97,3 +97,7 @@ python/cugraph/cugraph/tests/dask-worker-space
docs/cugraph/source/api_docs/api/*
_html
_text
+
+# clang tooling
+compile_commands.json
+.clangd/
diff --git a/benchmarks/cugraph-dgl/python-script/dgl_dataloading_benchmark/dgl_benchmark.py b/benchmarks/cugraph-dgl/python-script/dgl_dataloading_benchmark/dgl_benchmark.py
new file mode 100644
index 00000000000..0a52703c546
--- /dev/null
+++ b/benchmarks/cugraph-dgl/python-script/dgl_dataloading_benchmark/dgl_benchmark.py
@@ -0,0 +1,291 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import dgl
+import torch
+import pandas as pd
+import os
+import time
+import json
+import random
+import numpy as np
+from argparse import ArgumentParser
+
+
+def load_edges_from_disk(parquet_path, replication_factor, input_meta):
+ """
+ Load the edges from disk into a graph data dictionary.
+ Args:
+ parquet_path: Path to the parquet directory.
+ replication_factor: Number of times to replicate the edges.
+ input_meta: Input meta data.
+ Returns:
+ dict: Dictionary of edge types to a tuple of (src, dst)
+ """
+ graph_data = {}
+ for edge_type in input_meta["num_edges"].keys():
+ print(
+ f"Loading edge index for edge type {edge_type}"
+ f"for replication factor = {replication_factor}"
+ )
+ can_edge_type = tuple(edge_type.split("__"))
+ # TODO: Rename `edge_index` to a better name
+ ei = pd.read_parquet(
+ os.path.join(parquet_path, edge_type, "edge_index.parquet")
+ )
+ ei = {
+ "src": torch.from_numpy(ei.src.values),
+ "dst": torch.from_numpy(ei.dst.values),
+ }
+ if replication_factor > 1:
+ src_ls = [ei["src"]]
+ dst_ls = [ei["dst"]]
+ for r in range(1, replication_factor):
+ new_src = ei["src"] + (
+ r * input_meta["num_nodes"][can_edge_type[0]]
+ )
+ src_ls.append(new_src)
+ new_dst = ei["dst"] + (
+ r * input_meta["num_nodes"][can_edge_type[2]]
+ )
+ dst_ls.append(new_dst)
+
+ ei["src"] = torch.cat(src_ls).contiguous()
+ ei["dst"] = torch.cat(dst_ls).contiguous()
+ graph_data[can_edge_type] = ei["src"], ei["dst"]
+ print("Graph Data compiled")
+ return graph_data
+
+
+def load_node_labels(dataset_path, replication_factor, input_meta):
+ num_nodes_dict = {
+ node_type: t * replication_factor
+ for node_type, t in input_meta["num_nodes"].items()
+ }
+ node_data = {}
+ for node_type in input_meta["num_nodes"].keys():
+ node_data[node_type] = {}
+ label_path = os.path.join(
+ dataset_path, "parquet", node_type, "node_label.parquet"
+ )
+ if os.path.exists(label_path):
+ node_label = pd.read_parquet(label_path)
+ if replication_factor > 1:
+ base_num_nodes = input_meta["num_nodes"][node_type]
+ dfr = pd.DataFrame(
+ {
+ "node": pd.concat(
+ [
+ node_label.node + (r * base_num_nodes)
+ for r in range(1, replication_factor)
+ ]
+ ),
+ "label": pd.concat(
+ [
+ node_label.label
+ for r in range(1, replication_factor)
+ ]
+ ),
+ }
+ )
+ node_label = pd.concat([node_label, dfr]).reset_index(
+ drop=True
+ )
+
+ node_label_tensor = torch.full(
+ (num_nodes_dict[node_type],), -1, dtype=torch.float32
+ )
+ node_label_tensor[
+ torch.as_tensor(node_label.node.values)
+ ] = torch.as_tensor(node_label.label.values)
+
+ del node_label
+ node_data[node_type]["train_idx"] = (
+ (node_label_tensor > -1).contiguous().nonzero().view(-1)
+ )
+ node_data[node_type]["y"] = node_label_tensor.contiguous()
+ else:
+ node_data[node_type]["num_nodes"] = num_nodes_dict[node_type]
+ return node_data
+
+
+def create_dgl_graph_from_disk(dataset_path, replication_factor=1):
+ """
+ Create a DGL graph from a dataset on disk.
+ Args:
+ dataset_path: Path to the dataset on disk.
+ replication_factor: Number of times to replicate the edges.
+ Returns:
+ DGLGraph: DGLGraph with the loaded dataset.
+ """
+ with open(os.path.join(dataset_path, "meta.json"), "r") as f:
+ input_meta = json.load(f)
+
+ parquet_path = os.path.join(dataset_path, "parquet")
+ graph_data = load_edges_from_disk(
+ parquet_path, replication_factor, input_meta
+ )
+ node_data = load_node_labels(dataset_path, replication_factor, input_meta)
+ g = dgl.heterograph(graph_data)
+
+ return g, node_data
+
+
+def create_dataloader(g, train_idx, batch_size, fanouts, use_uva):
+ """
+ Create a DGL dataloader from a DGL graph.
+ Args:
+ g: DGLGraph to create the dataloader from.
+ train_idx: Tensor containing the training indices.
+ batch_size: Batch size to use for the dataloader.
+ fanouts: List of fanouts to use for the dataloader.
+ use_uva: Whether to use unified virtual address space.
+ Returns:
+ DGLGraph: DGLGraph with the loaded dataset.
+ """
+
+ print("Creating dataloader", flush=True)
+ st = time.time()
+ if use_uva:
+ train_idx = {k: v.to("cuda") for k, v in train_idx.items()}
+ sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts=fanouts)
+ dataloader = dgl.dataloading.DataLoader(
+ g,
+ train_idx,
+ sampler,
+ num_workers=0,
+ batch_size=batch_size,
+ use_uva=use_uva,
+ shuffle=False,
+ drop_last=False,
+ )
+ et = time.time()
+ print(f"Time to create dataloader = {et - st:.2f} seconds")
+ return dataloader
+
+
+def dataloading_benchmark(g, train_idx, fanouts, batch_sizes, use_uva):
+ """
+ Run the dataloading benchmark.
+ Args:
+ g: DGLGraph
+ train_idx: Tensor containing the training indices.
+ fanouts: List of fanouts to use for the dataloader.
+ batch_sizes: List of batch sizes to use for the dataloader.
+ use_uva: Whether to use unified virtual address space.
+ """
+ time_ls = []
+ for fanout in fanouts:
+ for batch_size in batch_sizes:
+ dataloader = create_dataloader(
+ g,
+ train_idx,
+ batch_size=batch_size,
+ fanouts=fanout,
+ use_uva=use_uva,
+ )
+ dataloading_st = time.time()
+ for input_nodes, output_nodes, blocks in dataloader:
+ pass
+ dataloading_et = time.time()
+ dataloading_time = dataloading_et - dataloading_st
+ time_d = {
+ "fanout": fanout,
+ "batch_size": batch_size,
+ "dataloading_time_per_epoch": dataloading_time,
+ "dataloading_time_per_batch": dataloading_time / len(dataloader),
+ "num_edges": g.num_edges(),
+ "num_batches": len(dataloader),
+ }
+ time_ls.append(time_d)
+
+ print("Dataloading completed")
+ print(f"Fanout = {fanout}, batch_size = {batch_size}")
+ print(
+ f"Time taken {dataloading_time:.2f} ",
+ f"seconds for num batches {len(dataloader)}",
+ flush=True,
+ )
+ print("==============================================")
+ return time_ls
+
+def set_seed(seed):
+ random.seed(seed)
+ np.random.seed(seed)
+ torch.manual_seed(seed)
+ torch.cuda.manual_seed_all(seed)
+
+if __name__ == "__main__":
+ parser = ArgumentParser()
+ parser.add_argument(
+ "--dataset_path", type=str, default="/datasets/abarghi/ogbn_papers100M"
+ )
+ parser.add_argument("--replication_factors", type=str, default="1,2,4,8")
+ parser.add_argument(
+ "--fanouts", type=str, default="25_25,10_10_10,5_10_20"
+ )
+ parser.add_argument("--batch_sizes", type=str, default="512,1024")
+ parser.add_argument("--do_not_use_uva", action="store_true")
+ parser.add_argument("--seed", type=int, default=42)
+ args = parser.parse_args()
+
+ if args.do_not_use_uva:
+ use_uva = False
+ else:
+ use_uva = True
+ set_seed(args.seed)
+ replication_factors = [int(x) for x in args.replication_factors.split(",")]
+ fanouts = [[int(y) for y in x.split("_")] for x in args.fanouts.split(",")]
+ batch_sizes = [int(x) for x in args.batch_sizes.split(",")]
+
+ print("Running dgl dataloading benchmark with the following parameters:")
+ print(f"Dataset path = {args.dataset_path}")
+ print(f"Replication factors = {replication_factors}")
+ print(f"Fanouts = {fanouts}")
+ print(f"Batch sizes = {batch_sizes}")
+ print(f"Use UVA = {use_uva}")
+ print("==============================================")
+
+ time_ls = []
+ for replication_factor in replication_factors:
+ st = time.time()
+ g, node_data = create_dgl_graph_from_disk(
+ dataset_path=args.dataset_path,
+ replication_factor=replication_factor,
+ )
+ et = time.time()
+ print(f"Replication factor = {replication_factor}")
+ print(
+ f"G has {g.num_edges()} edges and took",
+ f" {et - st:.2f} seconds to load"
+ )
+ train_idx = {"paper": node_data["paper"]["train_idx"]}
+ r_time_ls = dataloading_benchmark(
+ g, train_idx, fanouts, batch_sizes, use_uva=use_uva
+ )
+ print(
+ "Benchmark completed for replication factor = ", replication_factor
+ )
+ print("==============================================")
+ # Add replication factor to the time list
+ [
+ x.update({"replication_factor": replication_factor})
+ for x in r_time_ls
+ ]
+ time_ls.extend(r_time_ls)
+
+ df = pd.DataFrame(time_ls)
+ df.to_csv("dgl_dataloading_benchmark.csv", index=False)
+ print("Benchmark completed for all replication factors")
+ print("==============================================")
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 3798d561126..821aa25c1b9 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -49,7 +49,11 @@ cd "${package_dir}"
python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
-mkdir -p final_dist
-python -m auditwheel repair -w final_dist dist/*
-
-RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
+# pure-python packages should not have auditwheel run on them.
+if [[ ${package_name} == "nx-cugraph" ]]; then
+ RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 dist
+else
+ mkdir -p final_dist
+ python -m auditwheel repair -w final_dist dist/*
+ RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
+fi
diff --git a/ci/build_wheel_nx-cugraph.sh b/ci/build_wheel_nx-cugraph.sh
new file mode 100755
index 00000000000..4481de1283d
--- /dev/null
+++ b/ci/build_wheel_nx-cugraph.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+./ci/build_wheel.sh nx-cugraph python/nx-cugraph
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index bd3aa6bc370..aaeaa715434 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -62,6 +62,7 @@ sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugr
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugraph-service/server/cugraph_service_server/__init__.py
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/pylibcugraph/pylibcugraph/__init__.py
sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/nx-cugraph/nx_cugraph/__init__.py
+sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/nx-cugraph/_nx_cugraph/__init__.py
# Python pyproject.toml updates
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph/pyproject.toml
@@ -81,6 +82,9 @@ NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; prin
DEPENDENCIES=(
cudf
cugraph
+ cugraph-dgl
+ cugraph-pyg
+ cugraph-service-server
cugraph-service-client
cuxfilter
dask-cuda
@@ -92,6 +96,7 @@ DEPENDENCIES=(
librmm
pylibcugraph
pylibcugraphops
+ pylibwholegraph
pylibraft
pyraft
raft-dask
@@ -128,3 +133,10 @@ done
sed_runner "s/RAPIDS_VERSION_NUMBER=\".*/RAPIDS_VERSION_NUMBER=\"${NEXT_SHORT_TAG}\"/g" ci/build_docs.sh
sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" python/nx-cugraph/README.md
+
+# .devcontainer files
+find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do
+ sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
+ sed_runner "s@rapidsai/devcontainers/features/ucx:[0-9.]*@rapidsai/devcontainers/features/ucx:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
+ sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
+done
diff --git a/ci/test_python.sh b/ci/test_python.sh
index 14886909fc9..7b0077991ae 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -63,10 +63,6 @@ pytest \
tests
popd
-# FIXME: TEMPORARILY disable single-GPU "MG" testing until
-# https://github.com/rapidsai/cugraph/issues/3790 is closed
-# When closed, replace -k "not _mg" with
-# -k "not test_property_graph_mg" \
rapids-logger "pytest cugraph"
pushd python/cugraph/cugraph
export DASK_WORKER_DEVICES="0"
@@ -79,7 +75,7 @@ pytest \
--cov=cugraph \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-coverage.xml" \
--cov-report=term \
- -k "not _mg" \
+ -k "not test_property_graph_mg" \
tests
popd
diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
index b62635d08b4..3ac3549f143 100755
--- a/ci/test_wheel.sh
+++ b/ci/test_wheel.sh
@@ -6,19 +6,20 @@ set -eoxu pipefail
package_name=$1
package_dir=$2
+python_package_name=$(echo ${package_name}|sed 's/-/_/g')
+
mkdir -p ./dist
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-# echo to expand wildcard before adding `[extra]` requires for pip
+# use 'ls' to expand wildcard before adding `[extra]` requires for pip
RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
-python -m pip install $(echo ./dist/${package_name}*.whl)[test]
+# pip creates wheels using python package names
+python -m pip install $(ls ./dist/${python_package_name}*.whl)[test]
# Run smoke tests for aarch64 pull requests
arch=$(uname -m)
if [[ "${arch}" == "aarch64" && ${RAPIDS_BUILD_TYPE} == "pull-request" ]]; then
python ./ci/wheel_smoke_test_${package_name}.py
else
- # FIXME: TEMPORARILY disable single-GPU "MG" testing until
- # https://github.com/rapidsai/cugraph/issues/3790 is closed
- RAPIDS_DATASET_ROOT_DIR=`pwd`/datasets python -m pytest -k "not _mg" ./python/${package_name}/${package_name}/tests
+ RAPIDS_DATASET_ROOT_DIR=`pwd`/datasets python -m pytest ./python/${package_name}/${python_package_name}/tests
fi
diff --git a/ci/test_wheel_cugraph.sh b/ci/test_wheel_cugraph.sh
index 4d511ac2a0f..ac18459128a 100755
--- a/ci/test_wheel_cugraph.sh
+++ b/ci/test_wheel_cugraph.sh
@@ -9,14 +9,6 @@ RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-whe
python -m pip install --no-deps ./local-pylibcugraph-dep/pylibcugraph*.whl
# Always install latest dask for testing
-python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main
-
-# Only download test data for x86
-arch=$(uname -m)
-if [[ "${arch}" == "x86_64" ]]; then
- pushd ./datasets
- bash ./get_test_data.sh
- popd
-fi
+python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2
./ci/test_wheel.sh cugraph python/cugraph
diff --git a/ci/test_wheel_nx-cugraph.sh b/ci/test_wheel_nx-cugraph.sh
new file mode 100755
index 00000000000..53d40960fc3
--- /dev/null
+++ b/ci/test_wheel_nx-cugraph.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+set -eoxu pipefail
+
+./ci/test_wheel.sh nx-cugraph python/nx-cugraph
diff --git a/ci/wheel_smoke_test_nx-cugraph.py b/ci/wheel_smoke_test_nx-cugraph.py
new file mode 100644
index 00000000000..10d26e3aac7
--- /dev/null
+++ b/ci/wheel_smoke_test_nx-cugraph.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import networkx as nx
+import nx_cugraph as nxcg
+
+
+if __name__ == "__main__":
+ G = nx.Graph()
+ G.add_edges_from([(0, 1), (1, 2), (2, 3)])
+
+ nx_result = nx.betweenness_centrality(G)
+ # nx_cugraph is intended to be called via the NetworkX dispatcher, like
+ # this:
+ # nxcu_result = nx.betweenness_centrality(G, backend="cugraph")
+ #
+ # but here it is being called directly since the NetworkX version that
+ # supports the "backend" kwarg may not be available in the testing env.
+ nxcu_result = nxcg.betweenness_centrality(G)
+
+ nx_nodes, nxcu_nodes = nx_result.keys(), nxcu_result.keys()
+ assert nxcu_nodes == nx_nodes
+ for node_id in nx_nodes:
+ nx_bc, nxcu_bc = nx_result[node_id], nxcu_result[node_id]
+ assert math.isclose(nx_bc, nxcu_bc, rel_tol=1e-6), \
+ f"bc for {node_id=} exceeds tolerance: {nx_bc=}, {nxcu_bc=}"
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index c66890f8ae5..87179ef892e 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -19,11 +19,11 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.9.2
- dask-cuda==23.10.*
- dask-cudf==23.10.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.9.2
+- distributed==2023.9.2
- doxygen
- fsspec>=0.6.0
- gcc_linux-64=11.*
@@ -53,6 +53,7 @@ dependencies:
- pydata-sphinx-theme
- pylibcugraphops==23.10.*
- pylibraft==23.10.*
+- pylibwholegraph==23.10.*
- pytest
- pytest-benchmark
- pytest-cov
@@ -66,10 +67,12 @@ dependencies:
- scikit-build>=0.13.1
- scikit-learn>=0.23.1
- scipy
+- setuptools>=61.0.0
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx<6
- sphinxcontrib-websupport
- ucx-proc=*=gpu
- ucx-py==0.34.*
+- wheel
name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index 3afb1415572..d54dc0abf51 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -19,11 +19,11 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.9.2
- dask-cuda==23.10.*
- dask-cudf==23.10.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.9.2
+- distributed==2023.9.2
- doxygen
- fsspec>=0.6.0
- gcc_linux-64=11.*
@@ -52,6 +52,7 @@ dependencies:
- pydata-sphinx-theme
- pylibcugraphops==23.10.*
- pylibraft==23.10.*
+- pylibwholegraph==23.10.*
- pytest
- pytest-benchmark
- pytest-cov
@@ -65,10 +66,12 @@ dependencies:
- scikit-build>=0.13.1
- scikit-learn>=0.23.1
- scipy
+- setuptools>=61.0.0
- sphinx-copybutton
- sphinx-markdown-tables
- sphinx<6
- sphinxcontrib-websupport
- ucx-proc=*=gpu
- ucx-py==0.34.*
+- wheel
name: all_cuda-120_arch-x86_64
diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml
index 2d7ed2f4cda..1dc5a75c41b 100644
--- a/conda/recipes/cugraph-pyg/meta.yaml
+++ b/conda/recipes/cugraph-pyg/meta.yaml
@@ -26,7 +26,7 @@ requirements:
- python
- scikit-build >=0.13.1
run:
- - distributed >=2023.7.1
+ - distributed ==2023.9.2
- numba >=0.57
- numpy >=1.21
- python
diff --git a/conda/recipes/cugraph-service/meta.yaml b/conda/recipes/cugraph-service/meta.yaml
index f3229c27364..2daf0438351 100644
--- a/conda/recipes/cugraph-service/meta.yaml
+++ b/conda/recipes/cugraph-service/meta.yaml
@@ -59,7 +59,7 @@ outputs:
- cupy >=12.0.0
- dask-cuda ={{ minor_version }}
- dask-cudf ={{ minor_version }}
- - distributed >=2023.7.1
+ - distributed ==2023.9.2
- numba >=0.57
- numpy >=1.21
- python
diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml
index ad5965ad20c..f9bf54a2ef4 100644
--- a/conda/recipes/cugraph/meta.yaml
+++ b/conda/recipes/cugraph/meta.yaml
@@ -76,9 +76,9 @@ requirements:
- cupy >=12.0.0
- dask-cuda ={{ minor_version }}
- dask-cudf ={{ minor_version }}
- - dask >=2023.7.1
- - dask-core >=2023.7.1
- - distributed >=2023.7.1
+ - dask ==2023.9.2
+ - dask-core ==2023.9.2
+ - distributed ==2023.9.2
- fsspec>=0.6.0
- libcugraph ={{ version }}
- pylibcugraph ={{ version }}
diff --git a/cpp/.clangd b/cpp/.clangd
new file mode 100644
index 00000000000..7c4fe036ddf
--- /dev/null
+++ b/cpp/.clangd
@@ -0,0 +1,65 @@
+# https://clangd.llvm.org/config
+
+# Apply a config conditionally to all C files
+If:
+ PathMatch: .*\.(c|h)$
+
+---
+
+# Apply a config conditionally to all C++ files
+If:
+ PathMatch: .*\.(c|h)pp
+
+---
+
+# Apply a config conditionally to all CUDA files
+If:
+ PathMatch: .*\.cuh?
+CompileFlags:
+ Add:
+ - "-x"
+ - "cuda"
+ # No error on unknown CUDA versions
+ - "-Wno-unknown-cuda-version"
+ # Allow variadic CUDA functions
+ - "-Xclang=-fcuda-allow-variadic-functions"
+Diagnostics:
+ Suppress:
+ - "variadic_device_fn"
+ - "attributes_not_allowed"
+
+---
+
+# Tweak the clangd parse settings for all files
+CompileFlags:
+ Add:
+ # report all errors
+ - "-ferror-limit=0"
+ - "-fmacro-backtrace-limit=0"
+ - "-ftemplate-backtrace-limit=0"
+ # Skip the CUDA version check
+ - "--no-cuda-version-check"
+ Remove:
+ # remove gcc's -fcoroutines
+ - -fcoroutines
+ # remove nvc++ flags unknown to clang
+ - "-gpu=*"
+ - "-stdpar*"
+ # remove nvcc flags unknown to clang
+ - "-arch*"
+ - "-gencode*"
+ - "--generate-code*"
+ - "-ccbin*"
+ - "-t=*"
+ - "--threads*"
+ - "-Xptxas*"
+ - "-Xcudafe*"
+ - "-Xfatbin*"
+ - "-Xcompiler*"
+ - "--diag-suppress*"
+ - "--diag_suppress*"
+ - "--compiler-options*"
+ - "--expt-extended-lambda"
+ - "--expt-relaxed-constexpr"
+ - "-forward-unknown-to-host-compiler"
+ - "-Werror=cross-execution-space-call"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c5923109fe5..73025777959 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -437,6 +437,7 @@ add_library(cugraph_c
src/c_api/weakly_connected_components.cpp
src/c_api/strongly_connected_components.cpp
src/c_api/replicate_edgelist.cpp
+ src/c_api/legacy_k_truss.cpp
)
add_library(cugraph::cugraph_c ALIAS cugraph_c)
diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp
index b624ec5c0e0..78846bc5766 100644
--- a/cpp/include/cugraph/algorithms.hpp
+++ b/cpp/include/cugraph/algorithms.hpp
@@ -430,34 +430,39 @@ void connected_components(legacy::GraphCSRView const& graph,
VT* labels);
/**
- * @brief Compute k truss for a graph
+ * @brief Compute k truss for a graph ** temporary
*
* K Truss is the maximal subgraph of a graph which contains at least three
* vertices where every edge is incident to at least k-2 triangles.
*
- * Note that current implementation does not support a weighted graph.
+ * This version is a temporary solution to clean up python integration through the C API.
*
- * @throws cugraph::logic_error with a custom message when an error
- * occurs.
+ * This version is only supported SG.
*
- * @tparam VT Type of vertex identifiers. Supported value : int (signed,
- * 32-bit)
- * @tparam ET Type of edge identifiers. Supported value : int (signed,
- * 32-bit)
- * @tparam WT Type of edge weights. Supported values : float or double.
+ * @throws cugraph::logic_error with a custom message when an error
+ * occurs.
*
- * @param[in] graph cuGraph graph descriptor, should contain the connectivity
- * information as a COO
- * @param[in] k The order of the truss
- * @param[in] mr Memory resource used to allocate the returned graph
- * @return Unique pointer to K Truss subgraph in COO format
+ * @tparam vertex_t Type of vertex identifiers. Supported value : int (signed, 32-bit)
+ * @tparam weight_t Type of edge weights. Supported values : float or double.
*
+ * @param[in] handle Library handle (RAFT).
+ * @param[in] src Source vertices from COO
+ * @param[in] dst Destination vertices from COO
+ * @param[in] wgt Optional edge weights from COO
+ * @param[in] k The order of the truss
+ * @return Tuple containing extracted src, dst and optional weights for the
+ * subgraph
*/
-template
-std::unique_ptr> k_truss_subgraph(
- legacy::GraphCOOView const& graph,
- int k,
- rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+template
+std::tuple,
+ rmm::device_uvector,
+ std::optional>>
+k_truss_subgraph(raft::handle_t const& handle,
+ raft::device_span src,
+ raft::device_span dst,
+ std::optional> wgt,
+ size_t number_of_vertices,
+ int k);
// FIXME: Internally distances is of int (signed 32-bit) data type, but current
// template uses data from VT, ET, WT from the legacy::GraphCSR View even if weights
diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp
index e42ef9bfcf3..75cf8f91f92 100644
--- a/cpp/include/cugraph/sampling_functions.hpp
+++ b/cpp/include/cugraph/sampling_functions.hpp
@@ -103,7 +103,7 @@ namespace cugraph {
* std::get<1>(*edgelist_label_offsets) if @p edgelist_label_offsets.has_value() is true and 1
* otherwise and # hops = std::get<1>(*edgelist_hops) if edgelist_hops.has_value() is true and 1
* otherwise, valid only if at least one of @p edgelist_label_offsets.has_value() or @p
- * edgelist_hops.has_value() is rue), renumber_map to query original vertices (size = # unique
+ * edgelist_hops.has_value() is true), renumber_map to query original vertices (size = # unique
* vertices or aggregate # unique vertices for every label), and label offsets to the renumber_map
* (size = std::get<1>(*edgelist_label_offsets) + 1, valid only if @p
* edgelist_label_offsets.has_value() is true).
diff --git a/cpp/include/cugraph/utilities/graph_traits.hpp b/cpp/include/cugraph/utilities/graph_traits.hpp
index 7385630c011..e2737305aed 100644
--- a/cpp/include/cugraph/utilities/graph_traits.hpp
+++ b/cpp/include/cugraph/utilities/graph_traits.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -47,6 +47,16 @@ struct is_vertex_edge_combo {
(sizeof(vertex_t) <= sizeof(edge_t));
};
+// meta-function that constrains
+// vertex_t and edge_t template param candidates to only int32_t:
+//
+template
+struct is_vertex_edge_combo_legacy {
+ static constexpr bool value = is_one_of::value &&
+ is_one_of::value &&
+ (sizeof(vertex_t) <= sizeof(edge_t));
+};
+
// meta-function that constrains
// all 3 template param candidates:
//
@@ -56,4 +66,14 @@ struct is_candidate {
is_vertex_edge_combo::value && is_one_of::value;
};
+// meta-function that constrains
+// all 3 template param candidates where vertex_t and edge_t
+// are restricted to int32_t:
+//
+template
+struct is_candidate_legacy {
+ static constexpr bool value = is_vertex_edge_combo_legacy::value &&
+ is_one_of::value;
+};
+
} // namespace cugraph
diff --git a/cpp/include/cugraph_c/community_algorithms.h b/cpp/include/cugraph_c/community_algorithms.h
index e938c77cccd..8f1015f8632 100644
--- a/cpp/include/cugraph_c/community_algorithms.h
+++ b/cpp/include/cugraph_c/community_algorithms.h
@@ -227,6 +227,27 @@ cugraph_error_code_t cugraph_extract_ego(
cugraph_induced_subgraph_result_t** result,
cugraph_error_t** error);
+/**
+ * @brief Extract k truss for a graph
+ *
+ * @param [in] handle Handle for accessing resources
+ * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage
+ * needs to be transposed
+ * @param [in] k The order of the truss
+ * @param [in] do_expensive_check
+ * A flag to run expensive checks for input arguments (if set to true)
+ * @param [out] result Opaque object containing the extracted subgraph
+ * @param [out] error Pointer to an error object storing details of any error. Will
+ * be populated if error code is not CUGRAPH_SUCCESS
+ * @return error code
+ */
+cugraph_error_code_t cugraph_k_truss_subgraph(const cugraph_resource_handle_t* handle,
+ cugraph_graph_t* graph,
+ size_t k,
+ bool_t do_expensive_check,
+ cugraph_induced_subgraph_result_t** result,
+ cugraph_error_t** error);
+
/**
* @brief Opaque clustering output
*/
diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h
index 37124d100dd..92fe50ef622 100644
--- a/cpp/include/cugraph_c/sampling_algorithms.h
+++ b/cpp/include/cugraph_c/sampling_algorithms.h
@@ -205,6 +205,21 @@ typedef enum cugraph_prior_sources_behavior_t {
but exclude any vertex that has already been used as a source */
} cugraph_prior_sources_behavior_t;
+/**
+ * @brief Selects the type of compression to use for the output samples.
+ */
+typedef enum cugraph_compression_type_t {
+ COO = 0, /** Outputs in COO format. Default. */
+ CSR, /** Compresses in CSR format. This means the row (src) column
+ is compressed into a row pointer. */
+ CSC, /** Compresses in CSC format. This means the col (dst) column
+ is compressed into a column pointer. */
+ DCSR, /** Compresses in DCSR format. This outputs an additional index
+ that avoids empty entries in the row pointer. */
+ DCSC /** Compresses in DCSC format. This outputs an additional index
+ that avoid empty entries in the col pointer. */
+} cugraph_compression_type_t;
+
/**
* @brief Create sampling options object
*
@@ -225,6 +240,14 @@ cugraph_error_code_t cugraph_sampling_options_create(cugraph_sampling_options_t*
*/
void cugraph_sampling_set_renumber_results(cugraph_sampling_options_t* options, bool_t value);
+/**
+ * @brief Set whether to compress per-hop (True) or globally (False)
+ *
+ * @param options - opaque pointer to the sampling options
+ * @param value - Boolean value to assign to the option
+ */
+void cugraph_sampling_set_compress_per_hop(cugraph_sampling_options_t* options, bool_t value);
+
/**
* @brief Set flag to sample with_replacement
*
@@ -241,6 +264,15 @@ void cugraph_sampling_set_with_replacement(cugraph_sampling_options_t* options,
*/
void cugraph_sampling_set_return_hops(cugraph_sampling_options_t* options, bool_t value);
+/**
+ * @brief Set compression type
+ *
+ * @param options - opaque pointer to the sampling options
+ * @param value - Enum defining the compresion type
+ */
+void cugraph_sampling_set_compression_type(cugraph_sampling_options_t* options,
+ cugraph_compression_type_t value);
+
/**
* @brief Set prior sources behavior
*
@@ -265,62 +297,6 @@ void cugraph_sampling_set_dedupe_sources(cugraph_sampling_options_t* options, bo
*/
void cugraph_sampling_options_free(cugraph_sampling_options_t* options);
-/**
- * @brief Uniform Neighborhood Sampling
- * @deprecated This call should be replaced with cugraph_uniform_neighbor_sample
- *
- * Returns a sample of the neighborhood around specified start vertices. Optionally, each
- * start vertex can be associated with a label, allowing the caller to specify multiple batches
- * of sampling requests in the same function call - which should improve GPU utilization.
- *
- * If label is NULL then all start vertices will be considered part of the same batch and the
- * return value will not have a label column.
- *
- * @param [in] handle Handle for accessing resources
- * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage
- * needs to be transposed
- * @param [in] start_vertices Device array of start vertices for the sampling
- * @param [in] start_vertex_labels Device array of start vertex labels for the sampling. The
- * labels associated with each start vertex will be included in the output associated with results
- * that were derived from that start vertex. We only support label of type INT32. If label is
- * NULL, the return data will not be labeled.
- * @param [in] label_list Device array of the labels included in @p start_vertex_labels. If
- * @p label_to_comm_rank is not specified this parameter is ignored. If specified, label_list
- * must be sorted in ascending order.
- * @param [in] label_to_comm_rank Device array identifying which comm rank the output for a
- * particular label should be shuffled in the output. If not specifed the data is not organized in
- * output. If specified then the all data from @p label_list[i] will be shuffled to rank @p
- * label_to_comm_rank[i]. If not specified then the output data will not be shuffled between ranks.
- * @param [in] fanout Host array defining the fan out at each step in the sampling algorithm.
- * We only support fanout values of type INT32
- * @param [in/out] rng_state State of the random number generator, updated with each call
- * @param [in] with_replacement
- * Boolean value. If true selection of edges is done with
- * replacement. If false selection is done without replacement.
- * @param [in] return_hops Boolean value. If true include the hop number in the result,
- * If false the hop number will not be included in result.
- * @param [in] do_expensive_check
- * A flag to run expensive checks for input arguments (if set to true)
- * @param [in] result Output from the uniform_neighbor_sample call
- * @param [out] error Pointer to an error object storing details of any error. Will
- * be populated if error code is not CUGRAPH_SUCCESS
- * @return error code
- */
-cugraph_error_code_t cugraph_uniform_neighbor_sample_with_edge_properties(
- const cugraph_resource_handle_t* handle,
- cugraph_graph_t* graph,
- const cugraph_type_erased_device_array_view_t* start_vertices,
- const cugraph_type_erased_device_array_view_t* start_vertex_labels,
- const cugraph_type_erased_device_array_view_t* label_list,
- const cugraph_type_erased_device_array_view_t* label_to_comm_rank,
- const cugraph_type_erased_host_array_view_t* fan_out,
- cugraph_rng_state_t* rng_state,
- bool_t with_replacement,
- bool_t return_hops,
- bool_t do_expensive_check,
- cugraph_sample_result_t** result,
- cugraph_error_t** error);
-
/**
* @brief Uniform Neighborhood Sampling
*
@@ -374,6 +350,7 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample(
cugraph_error_t** error);
/**
+ * @deprecated This call should be replaced with cugraph_sample_result_get_majors
* @brief Get the source vertices from the sampling algorithm result
*
* @param [in] result The result from a sampling algorithm
@@ -383,6 +360,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_sources(
const cugraph_sample_result_t* result);
/**
+ * @deprecated This call should be replaced with cugraph_sample_result_get_minors
* @brief Get the destination vertices from the sampling algorithm result
*
* @param [in] result The result from a sampling algorithm
@@ -391,6 +369,33 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_sources(
cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_destinations(
const cugraph_sample_result_t* result);
+/**
+ * @brief Get the major vertices from the sampling algorithm result
+ *
+ * @param [in] result The result from a sampling algorithm
+ * @return type erased array pointing to the major vertices in device memory
+ */
+cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_majors(
+ const cugraph_sample_result_t* result);
+
+/**
+ * @brief Get the minor vertices from the sampling algorithm result
+ *
+ * @param [in] result The result from a sampling algorithm
+ * @return type erased array pointing to the minor vertices in device memory
+ */
+cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_minors(
+ const cugraph_sample_result_t* result);
+
+/**
+ * @brief Get the major offsets from the sampling algorithm result
+ *
+ * @param [in] result The result from a sampling algorithm
+ * @return type erased array pointing to the major offsets in device memory
+ */
+cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_major_offsets(
+ const cugraph_sample_result_t* result);
+
/**
* @brief Get the start labels from the sampling algorithm result
*
@@ -436,6 +441,15 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_edge_weight(
cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_hop(
const cugraph_sample_result_t* result);
+/**
+ * @brief Get the label-hop offsets from the sampling algorithm result
+ *
+ * @param [in] result The result from a sampling algorithm
+ * @return type erased array pointing to the label-hop offsets
+ */
+cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_hop_offsets(
+ const cugraph_sample_result_t* result);
+
/**
* @brief Get the index from the sampling algorithm result
*
@@ -446,6 +460,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_index(
const cugraph_sample_result_t* result);
/**
+ * @deprecated This call should be replaced with cugraph_sample_get_get_label_hop_offsets
* @brief Get the result offsets from the sampling algorithm result
*
* @param [in] result The result from a sampling algorithm
diff --git a/cpp/src/c_api/legacy_k_truss.cpp b/cpp/src/c_api/legacy_k_truss.cpp
new file mode 100644
index 00000000000..90e0894783a
--- /dev/null
+++ b/cpp/src/c_api/legacy_k_truss.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+#include
+
+namespace {
+
+struct k_truss_functor : public cugraph::c_api::abstract_functor {
+ raft::handle_t const& handle_;
+ cugraph::c_api::cugraph_graph_t* graph_;
+ size_t k_;
+ bool do_expensive_check_;
+ cugraph::c_api::cugraph_induced_subgraph_result_t* result_{};
+
+ k_truss_functor(::cugraph_resource_handle_t const* handle,
+ ::cugraph_graph_t* graph,
+ size_t k,
+ bool do_expensive_check)
+ : abstract_functor(),
+ handle_(*reinterpret_cast(handle)->handle_),
+ graph_(reinterpret_cast(graph)),
+ k_(k),
+ do_expensive_check_(do_expensive_check)
+ {
+ }
+
+ template
+ void operator()()
+ {
+ if constexpr (!cugraph::is_candidate_legacy::value) {
+ unsupported();
+ } else if constexpr (multi_gpu) {
+ unsupported();
+ } else {
+ // k_truss expects store_transposed == false
+ if constexpr (store_transposed) {
+ error_code_ = cugraph::c_api::
+ transpose_storage(
+ handle_, graph_, error_.get());
+ if (error_code_ != CUGRAPH_SUCCESS) return;
+ }
+
+ auto graph =
+ reinterpret_cast*>(graph_->graph_);
+
+ auto edge_weights = reinterpret_cast<
+ cugraph::edge_property_t,
+ weight_t>*>(graph_->edge_weights_);
+
+ auto number_map = reinterpret_cast*>(graph_->number_map_);
+
+ auto graph_view = graph->view();
+ rmm::device_uvector src(0, handle_.get_stream());
+ rmm::device_uvector dst(0, handle_.get_stream());
+ std::optional> wgt{std::nullopt};
+
+ std::tie(src, dst, wgt, std::ignore) = cugraph::decompress_to_edgelist(
+ handle_,
+ graph_view,
+ edge_weights ? std::make_optional(edge_weights->view()) : std::nullopt,
+ std::optional>{std::nullopt},
+ std::optional>(std::nullopt),
+ do_expensive_check_);
+
+ auto [result_src, result_dst, result_wgt] = cugraph::k_truss_subgraph(
+ handle_,
+ raft::device_span(src.data(), src.size()),
+ raft::device_span(dst.data(), dst.size()),
+ wgt ? std::make_optional(raft::device_span(wgt->data(), wgt->size()))
+ : std::nullopt,
+ graph_view.number_of_vertices(),
+ k_);
+
+ cugraph::unrenumber_int_vertices(
+ handle_,
+ result_src.data(),
+ result_src.size(),
+ number_map->data(),
+ graph_view.vertex_partition_range_lasts(),
+ do_expensive_check_);
+
+ cugraph::unrenumber_int_vertices(
+ handle_,
+ result_dst.data(),
+ result_dst.size(),
+ number_map->data(),
+ graph_view.vertex_partition_range_lasts(),
+ do_expensive_check_);
+
+ rmm::device_uvector edge_offsets(2, handle_.get_stream());
+ std::vector h_edge_offsets{{0, result_src.size()}};
+ raft::update_device(
+ edge_offsets.data(), h_edge_offsets.data(), h_edge_offsets.size(), handle_.get_stream());
+
+ result_ = new cugraph::c_api::cugraph_induced_subgraph_result_t{
+ new cugraph::c_api::cugraph_type_erased_device_array_t(result_src, graph_->vertex_type_),
+ new cugraph::c_api::cugraph_type_erased_device_array_t(result_dst, graph_->vertex_type_),
+ wgt ? new cugraph::c_api::cugraph_type_erased_device_array_t(*result_wgt,
+ graph_->weight_type_)
+ : NULL,
+ new cugraph::c_api::cugraph_type_erased_device_array_t(edge_offsets,
+ cugraph_data_type_id_t::SIZE_T)};
+ }
+ }
+};
+
+} // namespace
+
+extern "C" cugraph_error_code_t cugraph_k_truss_subgraph(const cugraph_resource_handle_t* handle,
+ cugraph_graph_t* graph,
+ size_t k,
+ bool_t do_expensive_check,
+ cugraph_induced_subgraph_result_t** result,
+ cugraph_error_t** error)
+{
+ k_truss_functor functor(handle, graph, k, do_expensive_check);
+
+ return cugraph::c_api::run_algorithm(graph, functor, result, error);
+}
diff --git a/cpp/src/c_api/uniform_neighbor_sampling.cpp b/cpp/src/c_api/uniform_neighbor_sampling.cpp
index f146c331d8c..1a53c899109 100644
--- a/cpp/src/c_api/uniform_neighbor_sampling.cpp
+++ b/cpp/src/c_api/uniform_neighbor_sampling.cpp
@@ -38,17 +38,20 @@ struct cugraph_sampling_options_t {
prior_sources_behavior_t prior_sources_behavior_{prior_sources_behavior_t::DEFAULT};
bool_t dedupe_sources_{FALSE};
bool_t renumber_results_{FALSE};
+ cugraph_compression_type_t compression_type_{cugraph_compression_type_t::COO};
+ bool_t compress_per_hop_{FALSE};
};
struct cugraph_sample_result_t {
- cugraph_type_erased_device_array_t* src_{nullptr};
- cugraph_type_erased_device_array_t* dst_{nullptr};
+ cugraph_type_erased_device_array_t* major_offsets_{nullptr};
+ cugraph_type_erased_device_array_t* majors_{nullptr};
+ cugraph_type_erased_device_array_t* minors_{nullptr};
cugraph_type_erased_device_array_t* edge_id_{nullptr};
cugraph_type_erased_device_array_t* edge_type_{nullptr};
cugraph_type_erased_device_array_t* wgt_{nullptr};
cugraph_type_erased_device_array_t* hop_{nullptr};
+ cugraph_type_erased_device_array_t* label_hop_offsets_{nullptr};
cugraph_type_erased_device_array_t* label_{nullptr};
- cugraph_type_erased_device_array_t* offsets_{nullptr};
cugraph_type_erased_device_array_t* renumber_map_{nullptr};
cugraph_type_erased_device_array_t* renumber_map_offsets_{nullptr};
};
@@ -186,6 +189,8 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct
graph_view.local_vertex_partition_range_last(),
do_expensive_check_);
+ bool has_labels = start_vertex_labels_ != nullptr;
+
auto&& [src, dst, wgt, edge_id, edge_type, hop, edge_label, offsets] =
cugraph::uniform_neighbor_sample(
handle_,
@@ -229,25 +234,130 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct
vertex_partition_lasts,
do_expensive_check_);
+ std::optional> majors{std::nullopt};
+ rmm::device_uvector minors(0, handle_.get_stream());
+ std::optional> major_offsets{std::nullopt};
+
+ std::optional> label_hop_offsets{std::nullopt};
+
std::optional> renumber_map{std::nullopt};
std::optional> renumber_map_offsets{std::nullopt};
+ bool src_is_major = (options_.compression_type_ == cugraph_compression_type_t::CSR) ||
+ (options_.compression_type_ == cugraph_compression_type_t::DCSR) ||
+ (options_.compression_type_ == cugraph_compression_type_t::COO);
+
if (options_.renumber_results_) {
- std::tie(src, dst, renumber_map, renumber_map_offsets) = cugraph::renumber_sampled_edgelist(
- handle_,
- std::move(src),
- std::move(dst),
- hop ? std::make_optional(raft::device_span{hop->data(), hop->size()})
- : std::nullopt,
- std::make_optional(std::make_tuple(
- raft::device_span{edge_label->data(), edge_label->size()},
- raft::device_span{offsets->data(), offsets->size()})),
- do_expensive_check_);
+ if (options_.compression_type_ == cugraph_compression_type_t::COO) {
+ // COO
+
+ rmm::device_uvector output_majors(0, handle_.get_stream());
+ rmm::device_uvector output_renumber_map(0, handle_.get_stream());
+ std::tie(output_majors,
+ minors,
+ wgt,
+ edge_id,
+ edge_type,
+ label_hop_offsets,
+ output_renumber_map,
+ renumber_map_offsets) =
+ cugraph::renumber_and_sort_sampled_edgelist(
+ handle_,
+ std::move(src),
+ std::move(dst),
+ wgt ? std::move(wgt) : std::nullopt,
+ edge_id ? std::move(edge_id) : std::nullopt,
+ edge_type ? std::move(edge_type) : std::nullopt,
+ hop ? std::make_optional(std::make_tuple(std::move(*hop), fan_out_->size_))
+ : std::nullopt,
+ offsets ? std::make_optional(std::make_tuple(
+ raft::device_span{offsets->data(), offsets->size()},
+ edge_label->size()))
+ : std::nullopt,
+ src_is_major,
+ do_expensive_check_);
+
+ majors.emplace(std::move(output_majors));
+ renumber_map.emplace(std::move(output_renumber_map));
+ } else {
+ // (D)CSC, (D)CSR
+
+ bool doubly_compress = (options_.compression_type_ == cugraph_compression_type_t::DCSR) ||
+ (options_.compression_type_ == cugraph_compression_type_t::DCSC);
+
+ rmm::device_uvector output_major_offsets(0, handle_.get_stream());
+ rmm::device_uvector output_renumber_map(0, handle_.get_stream());
+ std::tie(majors,
+ output_major_offsets,
+ minors,
+ wgt,
+ edge_id,
+ edge_type,
+ label_hop_offsets,
+ output_renumber_map,
+ renumber_map_offsets) =
+ cugraph::renumber_and_compress_sampled_edgelist(
+ handle_,
+ std::move(src),
+ std::move(dst),
+ wgt ? std::move(wgt) : std::nullopt,
+ edge_id ? std::move(edge_id) : std::nullopt,
+ edge_type ? std::move(edge_type) : std::nullopt,
+ hop ? std::make_optional(std::make_tuple(std::move(*hop), fan_out_->size_))
+ : std::nullopt,
+ offsets ? std::make_optional(std::make_tuple(
+ raft::device_span{offsets->data(), offsets->size()},
+ edge_label->size()))
+ : std::nullopt,
+ src_is_major,
+ options_.compress_per_hop_,
+ doubly_compress,
+ do_expensive_check_);
+
+ renumber_map.emplace(std::move(output_renumber_map));
+ major_offsets.emplace(std::move(output_major_offsets));
+ }
+
+ // These are now represented by label_hop_offsets
+ hop.reset();
+ offsets.reset();
+ } else {
+ if (options_.compression_type_ != cugraph_compression_type_t::COO) {
+ CUGRAPH_FAIL("Can only use COO format if not renumbering");
+ }
+
+ std::tie(src, dst, wgt, edge_id, edge_type, label_hop_offsets) =
+ cugraph::sort_sampled_edgelist(
+ handle_,
+ std::move(src),
+ std::move(dst),
+ wgt ? std::move(wgt) : std::nullopt,
+ edge_id ? std::move(edge_id) : std::nullopt,
+ edge_type ? std::move(edge_type) : std::nullopt,
+ hop ? std::make_optional(std::make_tuple(std::move(*hop), fan_out_->size_))
+ : std::nullopt,
+ offsets ? std::make_optional(std::make_tuple(
+ raft::device_span{offsets->data(), offsets->size()},
+ edge_label->size()))
+ : std::nullopt,
+ src_is_major,
+ do_expensive_check_);
+
+ majors.emplace(std::move(src));
+ minors = std::move(dst);
+
+ hop.reset();
+ offsets.reset();
}
result_ = new cugraph::c_api::cugraph_sample_result_t{
- new cugraph::c_api::cugraph_type_erased_device_array_t(src, graph_->vertex_type_),
- new cugraph::c_api::cugraph_type_erased_device_array_t(dst, graph_->vertex_type_),
+ (major_offsets)
+ ? new cugraph::c_api::cugraph_type_erased_device_array_t(*major_offsets, SIZE_T)
+ : nullptr,
+ (majors)
+ ? new cugraph::c_api::cugraph_type_erased_device_array_t(*majors, graph_->vertex_type_)
+ : nullptr,
+ new cugraph::c_api::cugraph_type_erased_device_array_t(minors, graph_->vertex_type_),
(edge_id)
? new cugraph::c_api::cugraph_type_erased_device_array_t(*edge_id, graph_->edge_type_)
: nullptr,
@@ -256,12 +366,14 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct
: nullptr,
(wgt) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*wgt, graph_->weight_type_)
: nullptr,
- (hop) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*hop, INT32) : nullptr,
+ (hop) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*hop, INT32)
+ : nullptr, // FIXME get rid of this
+ (label_hop_offsets)
+ ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T)
+ : nullptr,
(edge_label)
? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32)
: nullptr,
- (offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(offsets.value(), SIZE_T)
- : nullptr,
(renumber_map) ? new cugraph::c_api::cugraph_type_erased_device_array_t(
renumber_map.value(), graph_->vertex_type_)
: nullptr,
@@ -295,6 +407,13 @@ extern "C" void cugraph_sampling_set_renumber_results(cugraph_sampling_options_t
internal_pointer->renumber_results_ = value;
}
+extern "C" void cugraph_sampling_set_compress_per_hop(cugraph_sampling_options_t* options,
+ bool_t value)
+{
+ auto internal_pointer = reinterpret_cast(options);
+ internal_pointer->compress_per_hop_ = value;
+}
+
extern "C" void cugraph_sampling_set_with_replacement(cugraph_sampling_options_t* options,
bool_t value)
{
@@ -308,6 +427,20 @@ extern "C" void cugraph_sampling_set_return_hops(cugraph_sampling_options_t* opt
internal_pointer->return_hops_ = value;
}
+extern "C" void cugraph_sampling_set_compression_type(cugraph_sampling_options_t* options,
+ cugraph_compression_type_t value)
+{
+ auto internal_pointer = reinterpret_cast(options);
+ switch (value) {
+ case COO: internal_pointer->compression_type_ = cugraph_compression_type_t::COO; break;
+ case CSR: internal_pointer->compression_type_ = cugraph_compression_type_t::CSR; break;
+ case CSC: internal_pointer->compression_type_ = cugraph_compression_type_t::CSC; break;
+ case DCSR: internal_pointer->compression_type_ = cugraph_compression_type_t::DCSR; break;
+ case DCSC: internal_pointer->compression_type_ = cugraph_compression_type_t::DCSC; break;
+ default: CUGRAPH_FAIL("Invalid compression type");
+ }
+}
+
extern "C" void cugraph_sampling_set_prior_sources_behavior(cugraph_sampling_options_t* options,
cugraph_prior_sources_behavior_t value)
{
@@ -341,15 +474,45 @@ extern "C" void cugraph_sampling_options_free(cugraph_sampling_options_t* option
extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_sources(
const cugraph_sample_result_t* result)
{
- auto internal_pointer = reinterpret_cast(result);
- return reinterpret_cast(internal_pointer->src_->view());
+ // Deprecated.
+ return cugraph_sample_result_get_majors(result);
}
extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_destinations(
const cugraph_sample_result_t* result)
+{
+ // Deprecated.
+ return cugraph_sample_result_get_minors(result);
+}
+
+extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_majors(
+ const cugraph_sample_result_t* result)
+{
+ auto internal_pointer = reinterpret_cast(result);
+ return (internal_pointer->majors_ != nullptr)
+ ? reinterpret_cast(
+ internal_pointer->majors_->view())
+
+ : NULL;
+}
+
+extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_major_offsets(
+ const cugraph_sample_result_t* result)
+{
+ auto internal_pointer = reinterpret_cast(result);
+ return (internal_pointer->major_offsets_ != nullptr)
+ ? reinterpret_cast(
+ internal_pointer->major_offsets_->view())
+
+ : NULL;
+}
+
+extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_minors(
+ const cugraph_sample_result_t* result)
{
auto internal_pointer = reinterpret_cast(result);
- return reinterpret_cast(internal_pointer->dst_->view());
+ return reinterpret_cast(
+ internal_pointer->minors_->view());
}
extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_start_labels(
@@ -402,6 +565,16 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_ho
: NULL;
}
+extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_hop_offsets(
+ const cugraph_sample_result_t* result)
+{
+ auto internal_pointer = reinterpret_cast(result);
+ return internal_pointer->label_hop_offsets_ != nullptr
+ ? reinterpret_cast(
+ internal_pointer->label_hop_offsets_->view())
+ : NULL;
+}
+
extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_index(
const cugraph_sample_result_t* result)
{
@@ -413,9 +586,8 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_in
extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_offsets(
const cugraph_sample_result_t* result)
{
- auto internal_pointer = reinterpret_cast(result);
- return reinterpret_cast(
- internal_pointer->offsets_->view());
+ // Deprecated.
+ return cugraph_sample_result_get_label_hop_offsets(result);
}
extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map(
@@ -532,6 +704,7 @@ extern "C" cugraph_error_code_t cugraph_test_uniform_neighborhood_sample_result_
// create new cugraph_sample_result_t
*result = reinterpret_cast(new cugraph::c_api::cugraph_sample_result_t{
+ nullptr,
reinterpret_cast(
new_device_srcs.release()),
reinterpret_cast(
@@ -675,78 +848,20 @@ extern "C" cugraph_error_code_t cugraph_test_sample_result_create(
extern "C" void cugraph_sample_result_free(cugraph_sample_result_t* result)
{
auto internal_pointer = reinterpret_cast(result);
- delete internal_pointer->src_;
- delete internal_pointer->dst_;
+ delete internal_pointer->major_offsets_;
+ delete internal_pointer->majors_;
+ delete internal_pointer->minors_;
delete internal_pointer->edge_id_;
delete internal_pointer->edge_type_;
delete internal_pointer->wgt_;
delete internal_pointer->hop_;
+ delete internal_pointer->label_hop_offsets_;
delete internal_pointer->label_;
+ delete internal_pointer->renumber_map_;
+ delete internal_pointer->renumber_map_offsets_;
delete internal_pointer;
}
-extern "C" cugraph_error_code_t cugraph_uniform_neighbor_sample_with_edge_properties(
- const cugraph_resource_handle_t* handle,
- cugraph_graph_t* graph,
- const cugraph_type_erased_device_array_view_t* start_vertices,
- const cugraph_type_erased_device_array_view_t* start_vertex_labels,
- const cugraph_type_erased_device_array_view_t* label_list,
- const cugraph_type_erased_device_array_view_t* label_to_comm_rank,
- const cugraph_type_erased_host_array_view_t* fan_out,
- cugraph_rng_state_t* rng_state,
- bool_t with_replacement,
- bool_t return_hops,
- bool_t do_expensive_check,
- cugraph_sample_result_t** result,
- cugraph_error_t** error)
-{
- CAPI_EXPECTS((start_vertex_labels == nullptr) ||
- (reinterpret_cast(
- start_vertex_labels)
- ->type_ == INT32),
- CUGRAPH_INVALID_INPUT,
- "start_vertex_labels should be of type int",
- *error);
-
- CAPI_EXPECTS((label_to_comm_rank == nullptr) || (start_vertex_labels != nullptr),
- CUGRAPH_INVALID_INPUT,
- "cannot specify label_to_comm_rank unless start_vertex_labels is also specified",
- *error);
-
- CAPI_EXPECTS((label_to_comm_rank == nullptr) || (label_list != nullptr),
- CUGRAPH_INVALID_INPUT,
- "cannot specify label_to_comm_rank unless label_list is also specified",
- *error);
-
- CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ ==
- reinterpret_cast(
- start_vertices)
- ->type_,
- CUGRAPH_INVALID_INPUT,
- "vertex type of graph and start_vertices must match",
- *error);
-
- CAPI_EXPECTS(
- reinterpret_cast(fan_out)
- ->type_ == INT32,
- CUGRAPH_INVALID_INPUT,
- "fan_out should be of type int",
- *error);
-
- uniform_neighbor_sampling_functor functor{
- handle,
- graph,
- start_vertices,
- start_vertex_labels,
- label_list,
- label_to_comm_rank,
- fan_out,
- rng_state,
- cugraph::c_api::cugraph_sampling_options_t{with_replacement, return_hops},
- do_expensive_check};
- return cugraph::c_api::run_algorithm(graph, functor, result, error);
-}
-
cugraph_error_code_t cugraph_uniform_neighbor_sample(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
diff --git a/cpp/src/community/legacy/ktruss.cu b/cpp/src/community/legacy/ktruss.cu
index 74a871adb01..403593128c1 100644
--- a/cpp/src/community/legacy/ktruss.cu
+++ b/cpp/src/community/legacy/ktruss.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -34,18 +34,24 @@ namespace cugraph {
namespace detail {
-template
-std::unique_ptr> ktruss_subgraph_impl(
- legacy::GraphCOOView const& graph, int k, rmm::mr::device_memory_resource* mr)
+template
+std::tuple, rmm::device_uvector> ktruss_subgraph_impl(
+ raft::handle_t const& handle,
+ raft::device_span src,
+ raft::device_span dst,
+ size_t number_of_vertices,
+ int k)
{
- using HornetGraph = hornet::gpu::Hornet;
- using UpdatePtr = hornet::BatchUpdatePtr;
- using Update = hornet::gpu::BatchUpdate;
- cudaStream_t stream{nullptr};
- UpdatePtr ptr(graph.number_of_edges, graph.src_indices, graph.dst_indices);
+ using HornetGraph = hornet::gpu::Hornet;
+ using UpdatePtr = hornet::BatchUpdatePtr;
+ using Update = hornet::gpu::BatchUpdate;
+
+ HornetGraph hnt(number_of_vertices + 1);
+
+ // NOTE: Should a constant pointer be passed for @src and @dst
+ UpdatePtr ptr(static_cast(src.size()), src.data(), dst.data());
Update batch(ptr);
- HornetGraph hnt(graph.number_of_vertices + 1);
hnt.insert(batch);
CUGRAPH_EXPECTS(cudaPeekAtLastError() == cudaSuccess, "KTruss : Failed to initialize graph");
@@ -67,32 +73,42 @@ std::unique_ptr> ktruss_subgraph_impl(
kt.runForK(k);
CUGRAPH_EXPECTS(cudaPeekAtLastError() == cudaSuccess, "KTruss : Failed to run");
- auto out_graph = std::make_unique>(
- graph.number_of_vertices, kt.getGraphEdgeCount(), graph.has_data(), stream, mr);
+ rmm::device_uvector result_src(kt.getGraphEdgeCount(), handle.get_stream());
+ rmm::device_uvector result_dst(kt.getGraphEdgeCount(), handle.get_stream());
- kt.copyGraph(out_graph->src_indices(), out_graph->dst_indices());
+ kt.copyGraph(result_src.data(), result_dst.data());
kt.release();
CUGRAPH_EXPECTS(cudaPeekAtLastError() == cudaSuccess, "KTruss : Failed to release");
- return out_graph;
+ return std::make_tuple(std::move(result_src), std::move(result_dst));
}
-template
-std::unique_ptr> weighted_ktruss_subgraph_impl(
- legacy::GraphCOOView const& graph, int k, rmm::mr::device_memory_resource* mr)
+
+template
+std::tuple,
+ rmm::device_uvector,
+ std::optional>>
+weighted_ktruss_subgraph_impl(raft::handle_t const& handle,
+ raft::device_span src,
+ raft::device_span dst,
+ std::optional> wgt,
+ size_t number_of_vertices,
+ int k)
{
- using HornetGraph = hornet::gpu::Hornet>;
- using UpdatePtr = hornet::BatchUpdatePtr, hornet::DeviceType::DEVICE>;
- using Update = hornet::gpu::BatchUpdate>;
- cudaStream_t stream{nullptr};
- UpdatePtr ptr(graph.number_of_edges, graph.src_indices, graph.dst_indices, graph.edge_data);
+ using HornetGraph = hornet::gpu::Hornet>;
+ using UpdatePtr =
+ hornet::BatchUpdatePtr, hornet::DeviceType::DEVICE>;
+ using Update = hornet::gpu::BatchUpdate>;
+
+ HornetGraph hnt(number_of_vertices + 1);
+
+ UpdatePtr ptr(static_cast(src.size()), src.data(), dst.data(), wgt->data());
Update batch(ptr);
- HornetGraph hnt(graph.number_of_vertices + 1);
hnt.insert(batch);
CUGRAPH_EXPECTS(cudaPeekAtLastError() == cudaSuccess, "KTruss : Failed to initialize graph");
- KTrussWeighted kt(hnt);
+ KTrussWeighted kt(hnt);
kt.init();
kt.reset();
@@ -110,41 +126,60 @@ std::unique_ptr> weighted_ktruss_subgraph_impl(
kt.runForK(k);
CUGRAPH_EXPECTS(cudaPeekAtLastError() == cudaSuccess, "KTruss : Failed to run");
- auto out_graph = std::make_unique>(
- graph.number_of_vertices, kt.getGraphEdgeCount(), graph.has_data(), stream, mr);
+ rmm::device_uvector result_src(kt.getGraphEdgeCount(), handle.get_stream());
+ rmm::device_uvector result_dst(kt.getGraphEdgeCount(), handle.get_stream());
+ std::optional> result_wgt{std::nullopt};
- kt.copyGraph(out_graph->src_indices(), out_graph->dst_indices(), out_graph->edge_data());
+ result_wgt = rmm::device_uvector(kt.getGraphEdgeCount(), handle.get_stream());
+ kt.copyGraph(result_src.data(), result_dst.data(), result_wgt->data());
kt.release();
CUGRAPH_EXPECTS(cudaPeekAtLastError() == cudaSuccess, "KTruss : Failed to release");
- return out_graph;
+ return std::make_tuple(std::move(result_src), std::move(result_dst), std::move(result_wgt));
}
} // namespace detail
-template
-std::unique_ptr> k_truss_subgraph(
- legacy::GraphCOOView const& graph, int k, rmm::mr::device_memory_resource* mr)
+template
+std::tuple,
+ rmm::device_uvector,
+ std::optional>>
+k_truss_subgraph(raft::handle_t const& handle,
+ raft::device_span src,
+ raft::device_span dst,
+ std::optional> wgt,
+ size_t number_of_vertices,
+ int k)
{
- CUGRAPH_EXPECTS(graph.src_indices != nullptr, "Graph source indices cannot be a nullptr");
- CUGRAPH_EXPECTS(graph.dst_indices != nullptr, "Graph destination indices cannot be a nullptr");
-
- if (graph.edge_data == nullptr) {
- return detail::ktruss_subgraph_impl(graph, k, mr);
+ if (wgt.has_value()) {
+ return detail::weighted_ktruss_subgraph_impl(handle, src, dst, wgt, number_of_vertices, k);
} else {
- return detail::weighted_ktruss_subgraph_impl(graph, k, mr);
+ auto [result_src, result_dst] =
+ detail::ktruss_subgraph_impl(handle, src, dst, number_of_vertices, k);
+ std::optional> result_wgt{std::nullopt};
+ return std::make_tuple(std::move(result_src), std::move(result_dst), std::move(result_wgt));
}
}
-template std::unique_ptr>
-k_truss_subgraph(legacy::GraphCOOView const&,
- int,
- rmm::mr::device_memory_resource*);
-
-template std::unique_ptr>
-k_truss_subgraph(legacy::GraphCOOView const&,
- int,
- rmm::mr::device_memory_resource*);
+template std::tuple,
+ rmm::device_uvector,
+ std::optional>>
+k_truss_subgraph(raft::handle_t const& handle,
+ raft::device_span src,
+ raft::device_span dst,
+ std::optional> wgt,
+ size_t number_of_vertices,
+ int k);
+
+template std::tuple,
+ rmm::device_uvector,
+ std::optional>>
+k_truss_subgraph(raft::handle_t const& handle,
+ raft::device_span src,
+ raft::device_span dst,
+ std::optional> wgt,
+ size_t number_of_vertices,
+ int k);
} // namespace cugraph
diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh
index 0c397d91b20..77d4f2d865f 100644
--- a/cpp/src/sampling/sampling_post_processing_impl.cuh
+++ b/cpp/src/sampling/sampling_post_processing_impl.cuh
@@ -166,9 +166,7 @@ void check_input_edges(
std::numeric_limits::max()),
"Invalid input arguments: current implementation assumes that the number of "
"unique labels is no larger than std::numeric_limits::max().");
- CUGRAPH_EXPECTS(!edgelist_label_offsets || std::get<1>(*edgelist_label_offsets) > 0,
- "Invlaid input arguments: there should be 1 or more labels if "
- "edgelist_label_offsets.has_value() is true.");
+
CUGRAPH_EXPECTS(
!edgelist_label_offsets.has_value() ||
(std::get<0>(*edgelist_label_offsets).size() == std::get<1>(*edgelist_label_offsets) + 1),
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index f08606df8ea..2a4bb8ab2a5 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -741,5 +741,6 @@ ConfigureCTest(CAPI_K_CORE_TEST c_api/k_core_test.c)
ConfigureCTest(CAPI_INDUCED_SUBGRAPH_TEST c_api/induced_subgraph_test.c)
ConfigureCTest(CAPI_EGONET_TEST c_api/egonet_test.c)
ConfigureCTest(CAPI_TWO_HOP_NEIGHBORS_TEST c_api/two_hop_neighbors_test.c)
+ConfigureCTest(CAPI_LEGACY_K_TRUSS_TEST c_api/legacy_k_truss_test.c)
rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing_c DESTINATION bin/gtests/libcugraph_c)
diff --git a/cpp/tests/c_api/create_graph_test.c b/cpp/tests/c_api/create_graph_test.c
index eef49458f2b..736db761ebd 100644
--- a/cpp/tests/c_api/create_graph_test.c
+++ b/cpp/tests/c_api/create_graph_test.c
@@ -142,6 +142,14 @@ int test_create_sg_graph_csr()
vertex_t h_start[] = {0, 1, 2, 3, 4, 5};
weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
+ bool_t with_replacement = FALSE;
+ bool_t return_hops = TRUE;
+ cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT;
+ bool_t dedupe_sources = FALSE;
+ bool_t renumber_results = FALSE;
+ cugraph_compression_type_t compression = COO;
+ bool_t compress_per_hop = FALSE;
+
cugraph_resource_handle_t* handle = NULL;
cugraph_graph_t* graph = NULL;
cugraph_graph_properties_t properties;
@@ -238,8 +246,21 @@ int test_create_sg_graph_csr()
ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error);
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
- ret_code = cugraph_uniform_neighbor_sample_with_edge_properties(
- handle, graph, d_start_view, NULL, NULL, NULL, h_fan_out_view, rng_state, FALSE, FALSE, FALSE, &result, &ret_error);
+ cugraph_sampling_options_t *sampling_options;
+
+ ret_code = cugraph_sampling_options_create(&sampling_options, &ret_error);
+ TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "sampling_options create failed.");
+
+ cugraph_sampling_set_with_replacement(sampling_options, with_replacement);
+ cugraph_sampling_set_return_hops(sampling_options, return_hops);
+ cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior);
+ cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources);
+ cugraph_sampling_set_renumber_results(sampling_options, renumber_results);
+ cugraph_sampling_set_compression_type(sampling_options, compression);
+ cugraph_sampling_set_compress_per_hop(sampling_options, compress_per_hop);
+
+ ret_code = cugraph_uniform_neighbor_sample(
+ handle, graph, d_start_view, NULL, NULL, NULL, h_fan_out_view, rng_state, sampling_options, FALSE, &result, &ret_error);
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "uniform_neighbor_sample failed.");
@@ -289,6 +310,7 @@ int test_create_sg_graph_csr()
cugraph_free_resource_handle(handle);
cugraph_error_free(ret_error);
+ cugraph_sampling_options_free(sampling_options);
return test_ret_value;
}
diff --git a/cpp/tests/c_api/legacy_k_truss_test.c b/cpp/tests/c_api/legacy_k_truss_test.c
new file mode 100644
index 00000000000..bc85f568688
--- /dev/null
+++ b/cpp/tests/c_api/legacy_k_truss_test.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "c_test_utils.h" /* RUN_TEST */
+
+#include
+#include
+
+#include
+
+typedef int32_t vertex_t;
+typedef int32_t edge_t;
+typedef float weight_t;
+
+int generic_k_truss_test(vertex_t* h_src,
+ vertex_t* h_dst,
+ weight_t* h_wgt,
+ vertex_t* h_expected_src,
+ vertex_t* h_expected_dst,
+ weight_t* h_expected_wgt,
+ size_t* h_expected_offsets,
+ size_t num_vertices,
+ size_t num_edges,
+ size_t k,
+ size_t num_expected_offsets,
+ size_t num_expected_edges,
+ bool_t store_transposed)
+{
+ int test_ret_value = 0;
+
+ cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
+ cugraph_error_t* ret_error;
+
+ data_type_id_t vertex_tid = INT32;
+ data_type_id_t edge_tid = INT32;
+ data_type_id_t weight_tid = FLOAT32;
+ data_type_id_t edge_id_tid = INT32;
+ data_type_id_t edge_type_tid = INT32;
+
+ cugraph_resource_handle_t* resource_handle = NULL;
+ cugraph_graph_t* graph = NULL;
+ cugraph_type_erased_device_array_t* seeds = NULL;
+ cugraph_type_erased_device_array_view_t* seeds_view = NULL;
+ cugraph_induced_subgraph_result_t* result = NULL;
+
+ resource_handle = cugraph_create_resource_handle(NULL);
+ TEST_ASSERT(test_ret_value, resource_handle != NULL, "resource handle creation failed.");
+
+ ret_code = create_sg_test_graph(
+ resource_handle,
+ vertex_tid,
+ edge_tid,
+ h_src,
+ h_dst,
+ weight_tid,
+ h_wgt,
+ edge_type_tid,
+ NULL,
+ edge_id_tid,
+ NULL,
+ num_edges,
+ store_transposed,
+ FALSE,
+ TRUE,
+ FALSE,
+ &graph,
+ &ret_error);
+
+ TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed.");
+ TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+
+ ret_code =
+ cugraph_k_truss_subgraph(resource_handle, graph, k, FALSE, &result, &ret_error);
+ TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
+ TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_k_truss_subgraph failed.");
+
+ if (test_ret_value == 0) {
+ cugraph_type_erased_device_array_view_t* src;
+ cugraph_type_erased_device_array_view_t* dst;
+ cugraph_type_erased_device_array_view_t* wgt;
+ cugraph_type_erased_device_array_view_t* offsets;
+
+ src = cugraph_induced_subgraph_get_sources(result);
+ dst = cugraph_induced_subgraph_get_destinations(result);
+ wgt = cugraph_induced_subgraph_get_edge_weights(result);
+ offsets = cugraph_induced_subgraph_get_subgraph_offsets(result);
+
+ size_t num_result_edges = cugraph_type_erased_device_array_view_size(src);
+ size_t num_result_offsets = cugraph_type_erased_device_array_view_size(offsets);
+
+ vertex_t h_result_src[num_result_edges];
+ vertex_t h_result_dst[num_result_edges];
+ weight_t h_result_wgt[num_result_edges];
+ size_t h_result_offsets[num_result_offsets];
+
+ ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+ resource_handle, (byte_t*)h_result_src, src, &ret_error);
+ TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+ ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+ resource_handle, (byte_t*)h_result_dst, dst, &ret_error);
+ TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+ if (wgt != NULL){
+ ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+ resource_handle, (byte_t*)h_result_wgt, wgt, &ret_error);
+ TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+ }
+
+
+ ret_code = cugraph_type_erased_device_array_view_copy_to_host(
+ resource_handle, (byte_t*)h_result_offsets, offsets, &ret_error);
+ TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+
+ TEST_ASSERT(test_ret_value, num_result_edges == num_expected_edges, "results not the same size");
+
+ for (size_t i = 0; (i < num_expected_offsets) && (test_ret_value == 0); ++i) {
+ TEST_ASSERT(test_ret_value,
+ h_expected_offsets[i] == h_result_offsets[i],
+ "graph offsets should match");
+ }
+
+ for (size_t i = 0; (i < num_expected_edges) && (test_ret_value == 0); ++i) {
+ bool_t found = FALSE;
+ for (size_t j = 0; (j < num_expected_edges) && !found; ++j) {
+ if ((h_expected_src[i] == h_result_src[j]) && (h_expected_dst[i] == h_result_dst[j]))
+ if (wgt != NULL){
+ found = (nearlyEqual(h_expected_wgt[i], h_result_wgt[j], 0.001));
+ }
+ else{
+ found = TRUE;
+ }
+ }
+ TEST_ASSERT(test_ret_value, found, "extracted an edge that doesn't match");
+ }
+
+ cugraph_type_erased_device_array_view_free(src);
+ cugraph_type_erased_device_array_view_free(dst);
+ cugraph_type_erased_device_array_view_free(wgt);
+ cugraph_type_erased_device_array_view_free(offsets);
+ cugraph_induced_subgraph_result_free(result);
+ }
+
+ cugraph_sg_graph_free(graph);
+ cugraph_free_resource_handle(resource_handle);
+ cugraph_error_free(ret_error);
+
+ return test_ret_value;
+}
+
+int test_k_truss()
+{
+ size_t num_edges = 16;
+ size_t num_vertices = 6;
+ size_t k = 3;
+
+ vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5};
+ vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4};
+ weight_t h_wgt[] = {
+ 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
+
+ vertex_t h_result_src[] = {0, 0, 1, 1, 1, 2, 2, 2, 3, 3};
+ vertex_t h_result_dst[] = {1, 2, 0, 2, 3, 0, 1, 3, 1, 2};
+ weight_t h_result_wgt[] = {0.1, 5.1, 0.1, 3.1, 2.1, 5.1, 3.1, 4.1, 2.1, 4.1};
+ size_t h_result_offsets[] = {0, 10};
+ size_t num_expected_edges = 10;
+ size_t num_expected_offsets = 2;
+
+ return generic_k_truss_test(h_src,
+ h_dst,
+ h_wgt,
+ h_result_src,
+ h_result_dst,
+ h_result_wgt,
+ h_result_offsets,
+ num_vertices,
+ num_edges,
+ k,
+ num_expected_offsets,
+ num_expected_edges,
+ FALSE);
+}
+
+int test_k_truss_no_weights()
+{
+ size_t num_edges = 16;
+ size_t num_vertices = 6;
+ size_t k = 3;
+
+ vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5};
+ vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4};
+
+ vertex_t h_result_src[] = {0, 0, 1, 1, 1, 2, 2, 2, 3, 3};
+ vertex_t h_result_dst[] = {1, 2, 0, 2, 3, 0, 1, 3, 1, 2};
+ size_t h_result_offsets[] = {0, 10};
+ size_t num_expected_edges = 10;
+ size_t num_expected_offsets = 2;
+
+ return generic_k_truss_test(h_src,
+ h_dst,
+ NULL,
+ h_result_src,
+ h_result_dst,
+ NULL,
+ h_result_offsets,
+ num_vertices,
+ num_edges,
+ k,
+ num_expected_offsets,
+ num_expected_edges,
+ FALSE);
+}
+
+
+/******************************************************************************/
+
+int main(int argc, char** argv)
+{
+ int result = 0;
+ result |= RUN_TEST(test_k_truss);
+ result |= RUN_TEST(test_k_truss_no_weights);
+ return result;
+}
diff --git a/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c b/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c
index f8241bd8a5f..86a0a92eb01 100644
--- a/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c
+++ b/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c
@@ -213,11 +213,6 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "gatherv_fill failed.");
}
- if (return_hops) {
- ret_code = cugraph_test_device_gatherv_fill(handle, result_hops, h_result_hops);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "gatherv_fill failed.");
- }
-
if (d_start_labels != NULL) {
size_t sz = cugraph_type_erased_device_array_view_size(result_offsets);
@@ -452,6 +447,7 @@ int test_uniform_neighbor_from_alex(const cugraph_resource_handle_t* handle)
size_t num_vertices = 5;
size_t fan_out_size = 2;
size_t num_starts = 2;
+ size_t num_start_labels = 2;
vertex_t src[] = {0, 1, 2, 3, 4, 3, 4, 2, 0, 1, 0, 2};
vertex_t dst[] = {1, 2, 4, 2, 3, 4, 1, 1, 2, 3, 4, 4};
@@ -462,7 +458,6 @@ int test_uniform_neighbor_from_alex(const cugraph_resource_handle_t* handle)
int32_t batch[] = {0, 1};
int fan_out[] = {2, 2};
- bool_t with_replacement = TRUE;
bool_t store_transposed = FALSE;
int test_ret_value = 0;
@@ -472,6 +467,14 @@ int test_uniform_neighbor_from_alex(const cugraph_resource_handle_t* handle)
cugraph_graph_t* graph = NULL;
cugraph_sample_result_t* result = NULL;
+ bool_t with_replacement = FALSE;
+ bool_t return_hops = TRUE;
+ cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT;
+ bool_t dedupe_sources = FALSE;
+ bool_t renumber_results = FALSE;
+ cugraph_compression_type_t compression = COO;
+ bool_t compress_per_hop = FALSE;
+
cugraph_type_erased_device_array_t* d_start = NULL;
cugraph_type_erased_device_array_t* d_label = NULL;
cugraph_type_erased_device_array_view_t* d_start_view = NULL;
@@ -512,19 +515,31 @@ int test_uniform_neighbor_from_alex(const cugraph_resource_handle_t* handle)
h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, fan_out_size, INT32);
- ret_code = cugraph_uniform_neighbor_sample_with_edge_properties(handle,
- graph,
- d_start_view,
- d_label_view,
- NULL,
- NULL,
- h_fan_out_view,
- rng_state,
- with_replacement,
- TRUE,
- FALSE,
- &result,
- &ret_error);
+ cugraph_sampling_options_t *sampling_options;
+
+ ret_code = cugraph_sampling_options_create(&sampling_options, &ret_error);
+ TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "sampling_options create failed.");
+
+ cugraph_sampling_set_with_replacement(sampling_options, with_replacement);
+ cugraph_sampling_set_return_hops(sampling_options, return_hops);
+ cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior);
+ cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources);
+ cugraph_sampling_set_renumber_results(sampling_options, renumber_results);
+ cugraph_sampling_set_compression_type(sampling_options, compression);
+ cugraph_sampling_set_compress_per_hop(sampling_options, compress_per_hop);
+
+ ret_code = cugraph_uniform_neighbor_sample(handle,
+ graph,
+ d_start_view,
+ d_label_view,
+ NULL,
+ NULL,
+ h_fan_out_view,
+ rng_state,
+ sampling_options,
+ FALSE,
+ &result,
+ &ret_error);
#ifdef NO_CUGRAPH_OPS
TEST_ASSERT(
@@ -540,6 +555,7 @@ int test_uniform_neighbor_from_alex(const cugraph_resource_handle_t* handle)
cugraph_type_erased_device_array_view_t* result_weight;
cugraph_type_erased_device_array_view_t* result_labels;
cugraph_type_erased_device_array_view_t* result_hops;
+ cugraph_type_erased_device_array_view_t* result_offsets;
result_src = cugraph_sample_result_get_sources(result);
result_dst = cugraph_sample_result_get_destinations(result);
@@ -548,8 +564,10 @@ int test_uniform_neighbor_from_alex(const cugraph_resource_handle_t* handle)
result_weight = cugraph_sample_result_get_edge_weight(result);
result_labels = cugraph_sample_result_get_start_labels(result);
result_hops = cugraph_sample_result_get_hop(result);
+ result_offsets = cugraph_sample_result_get_offsets(result);
size_t result_size = cugraph_type_erased_device_array_view_size(result_src);
+ size_t offsets_size = cugraph_type_erased_device_array_view_size(result_offsets);
vertex_t h_srcs[result_size];
vertex_t h_dsts[result_size];
@@ -558,6 +576,7 @@ int test_uniform_neighbor_from_alex(const cugraph_resource_handle_t* handle)
weight_t h_wgt[result_size];
int h_labels[result_size];
int h_hop[result_size];
+ int h_offsets[offsets_size];
ret_code = cugraph_type_erased_device_array_view_copy_to_host(
handle, (byte_t*)h_srcs, result_src, &ret_error);
@@ -584,9 +603,24 @@ int test_uniform_neighbor_from_alex(const cugraph_resource_handle_t* handle)
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
ret_code = cugraph_type_erased_device_array_view_copy_to_host(
- handle, (byte_t*)h_hop, result_hops, &ret_error);
+ handle, (byte_t*)h_offsets, result_offsets, &ret_error);
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+ for(int k = 0; k < offsets_size-1; k += fan_out_size) {
+ for(int h = 0; h < fan_out_size; ++h) {
+ int hop_start = h_offsets[k+h];
+ int hop_end = h_offsets[k+h+1];
+ for(int i = hop_start; i < hop_end; ++i) {
+ h_hop[i] = h;
+ }
+ }
+ }
+
+ for(int k = 0; k < num_start_labels+1; ++k) {
+ h_offsets[k] = h_offsets[k*fan_out_size];
+ }
+ offsets_size = num_start_labels + 1;
+
// NOTE: The C++ tester does a more thorough validation. For our purposes
// here we will do a simpler validation, merely checking that all edges
// are actually part of the graph
@@ -611,6 +645,7 @@ int test_uniform_neighbor_from_alex(const cugraph_resource_handle_t* handle)
cugraph_type_erased_host_array_view_free(h_fan_out_view);
cugraph_mg_graph_free(graph);
cugraph_error_free(ret_error);
+ cugraph_sampling_options_free(sampling_options);
return test_ret_value;
}
@@ -661,6 +696,15 @@ int test_uniform_neighbor_sample_alex_bug(const cugraph_resource_handle_t* handl
size_t expected_size[] = { 3, 2, 1, 1, 1, 1, 1, 1 };
+
+ bool_t with_replacement = FALSE;
+ bool_t return_hops = TRUE;
+ cugraph_prior_sources_behavior_t prior_sources_behavior = CARRY_OVER;
+ bool_t dedupe_sources = TRUE;
+ bool_t renumber_results = FALSE;
+ cugraph_compression_type_t compression = COO;
+ bool_t compress_per_hop = FALSE;
+
// Create graph
int test_ret_value = 0;
cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
@@ -747,19 +791,30 @@ int test_uniform_neighbor_sample_alex_bug(const cugraph_resource_handle_t* handl
h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, fan_out_size, INT32);
- ret_code = cugraph_uniform_neighbor_sample_with_edge_properties(handle,
- graph,
- d_start_view,
- d_start_labels_view,
- d_label_list_view,
- d_label_to_output_comm_rank_view,
- h_fan_out_view,
- rng_state,
- FALSE,
- TRUE,
- FALSE,
- &result,
- &ret_error);
+ cugraph_sampling_options_t* sampling_options;
+ ret_code = cugraph_sampling_options_create(&sampling_options, &ret_error);
+ TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "sampling_options create failed.");
+
+ cugraph_sampling_set_with_replacement(sampling_options, with_replacement);
+ cugraph_sampling_set_return_hops(sampling_options, return_hops);
+ cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior);
+ cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources);
+ cugraph_sampling_set_renumber_results(sampling_options, renumber_results);
+ cugraph_sampling_set_compression_type(sampling_options, compression);
+ cugraph_sampling_set_compress_per_hop(sampling_options, compress_per_hop);
+
+ ret_code = cugraph_uniform_neighbor_sample(handle,
+ graph,
+ d_start_view,
+ d_start_labels_view,
+ d_label_list_view,
+ d_label_to_output_comm_rank_view,
+ h_fan_out_view,
+ rng_state,
+ sampling_options,
+ FALSE,
+ &result,
+ &ret_error);
#ifdef NO_CUGRAPH_OPS
TEST_ASSERT(
@@ -900,6 +955,14 @@ int test_uniform_neighbor_sample_sort_by_hop(const cugraph_resource_handle_t* ha
size_t expected_size[] = { 3, 2, 1, 1, 1, 1, 1, 1 };
+ bool_t with_replacement = FALSE;
+ bool_t return_hops = TRUE;
+ cugraph_prior_sources_behavior_t prior_sources_behavior = CARRY_OVER;
+ bool_t dedupe_sources = TRUE;
+ bool_t renumber_results = FALSE;
+ cugraph_compression_type_t compression = COO;
+ bool_t compress_per_hop = FALSE;
+
// Create graph
int test_ret_value = 0;
cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
@@ -986,19 +1049,30 @@ int test_uniform_neighbor_sample_sort_by_hop(const cugraph_resource_handle_t* ha
h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, fan_out_size, INT32);
- ret_code = cugraph_uniform_neighbor_sample_with_edge_properties(handle,
- graph,
- d_start_view,
- d_start_labels_view,
- d_label_list_view,
- d_label_to_output_comm_rank_view,
- h_fan_out_view,
- rng_state,
- FALSE,
- TRUE,
- FALSE,
- &result,
- &ret_error);
+ cugraph_sampling_options_t* sampling_options;
+ ret_code = cugraph_sampling_options_create(&sampling_options, &ret_error);
+ TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "sampling_options create failed.");
+
+ cugraph_sampling_set_with_replacement(sampling_options, with_replacement);
+ cugraph_sampling_set_return_hops(sampling_options, return_hops);
+ cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior);
+ cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources);
+ cugraph_sampling_set_renumber_results(sampling_options, renumber_results);
+ cugraph_sampling_set_compression_type(sampling_options, compression);
+ cugraph_sampling_set_compress_per_hop(sampling_options, compress_per_hop);
+
+ ret_code = cugraph_uniform_neighbor_sample(handle,
+ graph,
+ d_start_view,
+ d_start_labels_view,
+ d_label_list_view,
+ d_label_to_output_comm_rank_view,
+ h_fan_out_view,
+ rng_state,
+ sampling_options,
+ FALSE,
+ &result,
+ &ret_error);
#ifdef NO_CUGRAPH_OPS
TEST_ASSERT(
@@ -1047,14 +1121,27 @@ int test_uniform_neighbor_sample_sort_by_hop(const cugraph_resource_handle_t* ha
handle, (byte_t*)h_weight, result_weights, &ret_error);
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
- ret_code = cugraph_type_erased_device_array_view_copy_to_host(
- handle, (byte_t*)h_hops, result_hops, &ret_error);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
-
ret_code = cugraph_type_erased_device_array_view_copy_to_host(
handle, (byte_t*)h_result_offsets, result_offsets, &ret_error);
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+ for(int k = 0; k < result_offsets_size-1; k += fan_out_size) {
+ for(int h = 0; h < fan_out_size; ++h) {
+ int hop_start = h_result_offsets[k+h];
+ int hop_end = h_result_offsets[k+h+1];
+ for(int i = hop_start; i < hop_end; ++i) {
+ h_hops[i] = h;
+ }
+ }
+ }
+
+ size_t num_local_labels = (result_offsets_size - 1) / fan_out_size;
+
+ for(int k = 0; k < num_local_labels+1; ++k) {
+ h_result_offsets[k] = h_result_offsets[k*fan_out_size];
+ }
+ result_offsets_size = num_local_labels + 1;
+
// NOTE: The C++ tester does a more thorough validation. For our purposes
// here we will do a simpler validation, merely checking that all edges
// are actually part of the graph
@@ -1223,9 +1310,9 @@ int main(int argc, char** argv)
result |= RUN_MG_TEST(test_uniform_neighbor_from_alex, handle);
//result |= RUN_MG_TEST(test_uniform_neighbor_sample_alex_bug, handle);
result |= RUN_MG_TEST(test_uniform_neighbor_sample_sort_by_hop, handle);
- result |= RUN_MG_TEST(test_uniform_neighbor_sample_dedupe_sources, handle);
- result |= RUN_MG_TEST(test_uniform_neighbor_sample_unique_sources, handle);
- result |= RUN_MG_TEST(test_uniform_neighbor_sample_carry_over_sources, handle);
+ //result |= RUN_MG_TEST(test_uniform_neighbor_sample_dedupe_sources, handle);
+ //result |= RUN_MG_TEST(test_uniform_neighbor_sample_unique_sources, handle);
+ //result |= RUN_MG_TEST(test_uniform_neighbor_sample_carry_over_sources, handle);
cugraph_free_resource_handle(handle);
free_mg_raft_handle(raft_handle);
diff --git a/cpp/tests/c_api/uniform_neighbor_sample_test.c b/cpp/tests/c_api/uniform_neighbor_sample_test.c
index a2c1e230485..92f3821e3cc 100644
--- a/cpp/tests/c_api/uniform_neighbor_sample_test.c
+++ b/cpp/tests/c_api/uniform_neighbor_sample_test.c
@@ -53,6 +53,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle
vertex_t *h_start,
int *h_start_labels,
size_t num_start_vertices,
+ size_t num_start_labels,
int *fan_out,
size_t fan_out_size,
bool_t with_replacement,
@@ -192,7 +193,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle
int32_t h_result_edge_types[result_size];
int32_t h_result_hops[result_size];
size_t h_result_offsets[result_offsets_size];
- int h_result_labels[result_offsets_size-1];
+ int h_result_labels[num_start_labels];
vertex_t h_renumber_map[renumber_map_size];
size_t h_renumber_map_offsets[result_offsets_size];
@@ -216,9 +217,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle
handle, (byte_t*)h_result_edge_types, result_edge_types, &ret_error);
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
- ret_code = cugraph_type_erased_device_array_view_copy_to_host(
- handle, (byte_t*)h_result_hops, result_hops, &ret_error);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+ TEST_ASSERT(test_ret_value, result_hops == NULL, "hops was not empty");
ret_code = cugraph_type_erased_device_array_view_copy_to_host(
handle, (byte_t*)h_result_offsets, result_offsets, &ret_error);
@@ -228,6 +227,21 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle
handle, (byte_t*)h_result_labels, result_labels, &ret_error);
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+ for(int k = 0; k < result_offsets_size-1; k += fan_out_size) {
+ for(int h = 0; h < fan_out_size; ++h) {
+ int hop_start = h_result_offsets[k+h];
+ int hop_end = h_result_offsets[k+h+1];
+ for(int i = hop_start; i < hop_end; ++i) {
+ h_result_hops[i] = h;
+ }
+ }
+ }
+
+ for(int k = 0; k < num_start_labels+1; ++k) {
+ h_result_offsets[k] = h_result_offsets[k*fan_out_size];
+ }
+ result_offsets_size = num_start_labels + 1;
+
if (renumber_results) {
ret_code = cugraph_type_erased_device_array_view_copy_to_host(
handle, (byte_t*)h_renumber_map, result_renumber_map, &ret_error);
@@ -348,6 +362,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle
for (size_t i = h_result_offsets[label_id]; (i < h_result_offsets[label_id+1]) && (test_ret_value == 0) ; ++i) {
if (h_result_hops[i] == hop) {
+
bool found = false;
for (size_t j = 0 ; (!found) && (j < sources_size) ; ++j) {
found = renumber_results ? (h_renumber_map[h_renumber_map_offsets[label_id] + h_result_srcs[i]] == check_sources[j])
@@ -516,183 +531,6 @@ int create_test_graph_with_edge_ids(const cugraph_resource_handle_t* p_handle,
return test_ret_value;
}
-int test_uniform_neighbor_sample_with_properties(const cugraph_resource_handle_t* handle)
-{
- data_type_id_t vertex_tid = INT32;
- data_type_id_t edge_tid = INT32;
- data_type_id_t weight_tid = FLOAT32;
- data_type_id_t edge_id_tid = INT32;
- data_type_id_t edge_type_tid = INT32;
-
- size_t num_edges = 8;
- size_t num_vertices = 6;
- size_t fan_out_size = 1;
- size_t num_starts = 1;
-
- vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4};
- vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5};
- edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7};
- weight_t weight[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8};
- int32_t edge_types[] = {7, 6, 5, 4, 3, 2, 1, 0};
- vertex_t start[] = {2};
- int fan_out[] = {-1};
-
- // Create graph
- int test_ret_value = 0;
- cugraph_error_code_t ret_code = CUGRAPH_SUCCESS;
- cugraph_error_t* ret_error = NULL;
- cugraph_graph_t* graph = NULL;
- cugraph_sample_result_t* result = NULL;
-
- ret_code = create_sg_test_graph(handle,
- vertex_tid,
- edge_tid,
- src,
- dst,
- weight_tid,
- weight,
- edge_type_tid,
- edge_types,
- edge_id_tid,
- edge_ids,
- num_edges,
- FALSE,
- TRUE,
- FALSE,
- FALSE,
- &graph,
- &ret_error);
-
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed.");
-
- cugraph_type_erased_device_array_t* d_start = NULL;
- cugraph_type_erased_device_array_view_t* d_start_view = NULL;
- cugraph_type_erased_host_array_view_t* h_fan_out_view = NULL;
-
- ret_code =
- cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start, &ret_error);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start create failed.");
-
- d_start_view = cugraph_type_erased_device_array_view(d_start);
-
- ret_code = cugraph_type_erased_device_array_view_copy_from_host(
- handle, d_start_view, (byte_t*)start, &ret_error);
-
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed.");
-
- h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, 1, INT32);
-
- cugraph_rng_state_t *rng_state;
- ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
-
- ret_code = cugraph_uniform_neighbor_sample_with_edge_properties(handle,
- graph,
- d_start_view,
- NULL,
- NULL,
- NULL,
- h_fan_out_view,
- rng_state,
- FALSE,
- TRUE,
- FALSE,
- &result,
- &ret_error);
-
-#ifdef NO_CUGRAPH_OPS
- TEST_ASSERT(
- test_ret_value, ret_code != CUGRAPH_SUCCESS, "uniform_neighbor_sample should have failed")
-#else
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error));
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "uniform_neighbor_sample failed.");
-
- cugraph_type_erased_device_array_view_t* result_srcs;
- cugraph_type_erased_device_array_view_t* result_dsts;
- cugraph_type_erased_device_array_view_t* result_edge_id;
- cugraph_type_erased_device_array_view_t* result_weights;
- cugraph_type_erased_device_array_view_t* result_edge_types;
- cugraph_type_erased_device_array_view_t* result_hops;
-
- result_srcs = cugraph_sample_result_get_sources(result);
- result_dsts = cugraph_sample_result_get_destinations(result);
- result_edge_id = cugraph_sample_result_get_edge_id(result);
- result_weights = cugraph_sample_result_get_edge_weight(result);
- result_edge_types = cugraph_sample_result_get_edge_type(result);
- result_hops = cugraph_sample_result_get_hop(result);
-
- size_t result_size = cugraph_type_erased_device_array_view_size(result_srcs);
-
- vertex_t h_srcs[result_size];
- vertex_t h_dsts[result_size];
- edge_t h_edge_id[result_size];
- weight_t h_weight[result_size];
- int32_t h_edge_types[result_size];
- int32_t h_hops[result_size];
-
- ret_code = cugraph_type_erased_device_array_view_copy_to_host(
- handle, (byte_t*)h_srcs, result_srcs, &ret_error);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
-
- ret_code = cugraph_type_erased_device_array_view_copy_to_host(
- handle, (byte_t*)h_dsts, result_dsts, &ret_error);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
-
- ret_code = cugraph_type_erased_device_array_view_copy_to_host(
- handle, (byte_t*)h_edge_id, result_edge_id, &ret_error);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
-
- ret_code = cugraph_type_erased_device_array_view_copy_to_host(
- handle, (byte_t*)h_weight, result_weights, &ret_error);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
-
- ret_code = cugraph_type_erased_device_array_view_copy_to_host(
- handle, (byte_t*)h_edge_types, result_edge_types, &ret_error);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
-
- ret_code = cugraph_type_erased_device_array_view_copy_to_host(
- handle, (byte_t*)h_hops, result_hops, &ret_error);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
-
- // NOTE: The C++ tester does a more thorough validation. For our purposes
- // here we will do a simpler validation, merely checking that all edges
- // are actually part of the graph
- weight_t M_w[num_vertices][num_vertices];
- edge_t M_edge_id[num_vertices][num_vertices];
- int32_t M_edge_type[num_vertices][num_vertices];
-
- for (int i = 0; i < num_vertices; ++i)
- for (int j = 0; j < num_vertices; ++j) {
- M_w[i][j] = 0.0;
- M_edge_id[i][j] = -1;
- M_edge_type[i][j] = -1;
- }
-
- for (int i = 0; i < num_edges; ++i) {
- M_w[src[i]][dst[i]] = weight[i];
- M_edge_id[src[i]][dst[i]] = edge_ids[i];
- M_edge_type[src[i]][dst[i]] = edge_types[i];
- }
-
- for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) {
- TEST_ASSERT(test_ret_value,
- M_w[h_srcs[i]][h_dsts[i]] == h_weight[i],
- "uniform_neighbor_sample got edge that doesn't exist");
- TEST_ASSERT(test_ret_value,
- M_edge_id[h_srcs[i]][h_dsts[i]] == h_edge_id[i],
- "uniform_neighbor_sample got edge that doesn't exist");
- TEST_ASSERT(test_ret_value,
- M_edge_type[h_srcs[i]][h_dsts[i]] == h_edge_types[i],
- "uniform_neighbor_sample got edge that doesn't exist");
- }
-
- cugraph_sample_result_free(result);
-#endif
-
- cugraph_sg_graph_free(graph);
- cugraph_error_free(ret_error);
-}
-
int test_uniform_neighbor_sample_with_labels(const cugraph_resource_handle_t* handle)
{
data_type_id_t vertex_tid = INT32;
@@ -722,6 +560,14 @@ int test_uniform_neighbor_sample_with_labels(const cugraph_resource_handle_t* ha
cugraph_graph_t* graph = NULL;
cugraph_sample_result_t* result = NULL;
+ bool_t with_replacement = TRUE;
+ bool_t return_hops = TRUE;
+ cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT;
+ bool_t dedupe_sources = FALSE;
+ bool_t renumber_results = FALSE;
+ cugraph_compression_type_t compression = COO;
+ bool_t compress_per_hop = FALSE;
+
ret_code = create_sg_test_graph(handle,
vertex_tid,
edge_tid,
@@ -775,19 +621,31 @@ int test_uniform_neighbor_sample_with_labels(const cugraph_resource_handle_t* ha
ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error);
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed.");
- ret_code = cugraph_uniform_neighbor_sample_with_edge_properties(handle,
- graph,
- d_start_view,
- d_start_labels_view,
- NULL,
- NULL,
- h_fan_out_view,
- rng_state,
- FALSE,
- TRUE,
- FALSE,
- &result,
- &ret_error);
+ cugraph_sampling_options_t *sampling_options;
+
+ ret_code = cugraph_sampling_options_create(&sampling_options, &ret_error);
+ TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "sampling_options create failed.");
+
+ cugraph_sampling_set_with_replacement(sampling_options, with_replacement);
+ cugraph_sampling_set_return_hops(sampling_options, return_hops);
+ cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior);
+ cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources);
+ cugraph_sampling_set_renumber_results(sampling_options, renumber_results);
+ cugraph_sampling_set_compression_type(sampling_options, compression);
+ cugraph_sampling_set_compress_per_hop(sampling_options, compress_per_hop);
+
+ ret_code = cugraph_uniform_neighbor_sample(handle,
+ graph,
+ d_start_view,
+ d_start_labels_view,
+ NULL,
+ NULL,
+ h_fan_out_view,
+ rng_state,
+ sampling_options,
+ FALSE,
+ &result,
+ &ret_error);
#ifdef NO_CUGRAPH_OPS
TEST_ASSERT(
@@ -843,9 +701,7 @@ int test_uniform_neighbor_sample_with_labels(const cugraph_resource_handle_t* ha
handle, (byte_t*)h_edge_types, result_edge_types, &ret_error);
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
- ret_code = cugraph_type_erased_device_array_view_copy_to_host(
- handle, (byte_t*)h_hops, result_hops, &ret_error);
- TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
+ TEST_ASSERT(test_ret_value, result_hops == NULL, "hops was not empty");
ret_code = cugraph_type_erased_device_array_view_copy_to_host(
handle, (byte_t*)h_result_offsets, result_offsets, &ret_error);
@@ -884,6 +740,7 @@ int test_uniform_neighbor_sample_with_labels(const cugraph_resource_handle_t* ha
}
cugraph_sample_result_free(result);
+ cugraph_sampling_options_free(sampling_options);
#endif
cugraph_sg_graph_free(graph);
@@ -902,6 +759,7 @@ int test_uniform_neighbor_sample_clean(const cugraph_resource_handle_t* handle)
size_t num_vertices = 6;
size_t fan_out_size = 3;
size_t num_starts = 2;
+ size_t num_start_labels = 2;
vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4};
vertex_t dst[] = {1, 3, 3, 4, 0, 1, 3, 5, 5};
@@ -923,7 +781,7 @@ int test_uniform_neighbor_sample_clean(const cugraph_resource_handle_t* handle)
bool_t renumber_results = FALSE;
return generic_uniform_neighbor_sample_test(handle, src, dst, weight, edge_ids, edge_types, num_vertices, num_edges,
- start, start_labels, num_starts,
+ start, start_labels, num_starts, num_start_labels,
fan_out, fan_out_size, with_replacement,
return_hops, prior_sources_behavior, dedupe_sources, renumber_results);
}
@@ -940,6 +798,7 @@ int test_uniform_neighbor_sample_dedupe_sources(const cugraph_resource_handle_t*
size_t num_vertices = 6;
size_t fan_out_size = 3;
size_t num_starts = 2;
+ size_t num_start_labels = 2;
vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4};
vertex_t dst[] = {1, 3, 3, 4, 0, 1, 3, 5, 5};
@@ -961,7 +820,7 @@ int test_uniform_neighbor_sample_dedupe_sources(const cugraph_resource_handle_t*
bool_t renumber_results = FALSE;
return generic_uniform_neighbor_sample_test(handle, src, dst, weight, edge_ids, edge_types, num_vertices, num_edges,
- start, start_labels, num_starts,
+ start, start_labels, num_starts, num_start_labels,
fan_out, fan_out_size, with_replacement,
return_hops, prior_sources_behavior, dedupe_sources, renumber_results);
}
@@ -978,6 +837,7 @@ int test_uniform_neighbor_sample_unique_sources(const cugraph_resource_handle_t*
size_t num_vertices = 6;
size_t fan_out_size = 3;
size_t num_starts = 2;
+ size_t num_start_labels = 2;
vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4};
vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5};
@@ -999,7 +859,7 @@ int test_uniform_neighbor_sample_unique_sources(const cugraph_resource_handle_t*
bool_t renumber_results = FALSE;
return generic_uniform_neighbor_sample_test(handle, src, dst, weight, edge_ids, edge_types, num_vertices, num_edges,
- start, start_labels, num_starts,
+ start, start_labels, num_starts, num_start_labels,
fan_out, fan_out_size, with_replacement,
return_hops, prior_sources_behavior, dedupe_sources, renumber_results);
}
@@ -1016,6 +876,7 @@ int test_uniform_neighbor_sample_carry_over_sources(const cugraph_resource_handl
size_t num_vertices = 6;
size_t fan_out_size = 3;
size_t num_starts = 2;
+ size_t num_start_labels = 2;
vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4};
vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5};
@@ -1037,7 +898,7 @@ int test_uniform_neighbor_sample_carry_over_sources(const cugraph_resource_handl
bool_t renumber_results = FALSE;
return generic_uniform_neighbor_sample_test(handle, src, dst, weight, edge_ids, edge_types, num_vertices, num_edges,
- start, start_labels, num_starts,
+ start, start_labels, num_starts, num_start_labels,
fan_out, fan_out_size, with_replacement,
return_hops, prior_sources_behavior, dedupe_sources, renumber_results);
}
@@ -1054,6 +915,7 @@ int test_uniform_neighbor_sample_renumber_results(const cugraph_resource_handle_
size_t num_vertices = 6;
size_t fan_out_size = 3;
size_t num_starts = 2;
+ size_t num_start_labels = 2;
vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4};
vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5};
@@ -1075,7 +937,7 @@ int test_uniform_neighbor_sample_renumber_results(const cugraph_resource_handle_
bool_t renumber_results = TRUE;
return generic_uniform_neighbor_sample_test(handle, src, dst, weight, edge_ids, edge_types, num_vertices, num_edges,
- start, start_labels, num_starts,
+ start, start_labels, num_starts, num_start_labels,
fan_out, fan_out_size, with_replacement,
return_hops, prior_sources_behavior, dedupe_sources, renumber_results);
}
@@ -1087,7 +949,6 @@ int main(int argc, char** argv)
handle = cugraph_create_resource_handle(NULL);
int result = 0;
- result |= RUN_TEST_NEW(test_uniform_neighbor_sample_with_properties, handle);
result |= RUN_TEST_NEW(test_uniform_neighbor_sample_with_labels, handle);
result |= RUN_TEST_NEW(test_uniform_neighbor_sample_clean, handle);
result |= RUN_TEST_NEW(test_uniform_neighbor_sample_dedupe_sources, handle);
diff --git a/dependencies.yaml b/dependencies.yaml
index 04ec1b6e957..292fcf0baed 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -11,9 +11,15 @@ files:
- cpp_build
- cudatoolkit
- docs
+ - python_build_wheel
- python_build_cythonize
+ - depends_on_rmm
+ - depends_on_cudf
+ - depends_on_dask_cudf
+ - depends_on_pylibraft
+ - depends_on_raft_dask
+ - depends_on_cupy
- python_run_cugraph
- - python_run_pylibcugraph
- python_run_nx_cugraph
- python_run_cugraph_dgl
- python_run_cugraph_pyg
@@ -50,6 +56,7 @@ files:
output: none
includes:
- cudatoolkit
+ - depends_on_cudf
- py_version
- test_python_common
- test_python_cugraph
@@ -62,14 +69,22 @@ files:
includes:
- common_build
- python_build_wheel
+ - depends_on_rmm
+ - depends_on_pylibraft
+ - depends_on_pylibcugraph
- python_build_cythonize
- - python_build_cugraph
py_run_cugraph:
output: pyproject
pyproject_dir: python/cugraph
extras:
table: project
includes:
+ - depends_on_rmm
+ - depends_on_cudf
+ - depends_on_dask_cudf
+ - depends_on_raft_dask
+ - depends_on_pylibcugraph
+ - depends_on_cupy
- python_run_cugraph
py_test_cugraph:
output: pyproject
@@ -88,6 +103,8 @@ files:
includes:
- common_build
- python_build_wheel
+ - depends_on_rmm
+ - depends_on_pylibraft
- python_build_cythonize
py_run_pylibcugraph:
output: pyproject
@@ -95,7 +112,8 @@ files:
extras:
table: project
includes:
- - python_run_pylibcugraph
+ - depends_on_rmm
+ - depends_on_pylibraft
py_test_pylibcugraph:
output: pyproject
pyproject_dir: python/pylibcugraph
@@ -103,6 +121,7 @@ files:
table: project.optional-dependencies
key: test
includes:
+ - depends_on_cudf
- test_python_common
- test_python_pylibcugraph
py_build_nx_cugraph:
@@ -118,6 +137,8 @@ files:
extras:
table: project
includes:
+ - depends_on_pylibcugraph
+ - depends_on_cupy
- python_run_nx_cugraph
py_test_nx_cugraph:
output: pyproject
@@ -183,6 +204,10 @@ files:
extras:
table: project
includes:
+ - depends_on_rmm
+ - depends_on_cudf
+ - depends_on_dask_cudf
+ - depends_on_cupy
- python_run_cugraph_service_server
py_test_cugraph_service_server:
output: pyproject
@@ -334,41 +359,29 @@ dependencies:
- python>=3.9,<3.11
python_build_wheel:
common:
- - output_types: [conda, pyproject]
+ - output_types: [conda, pyproject, requirements]
packages:
- - wheel
- setuptools>=61.0.0
+ - wheel
python_build_cythonize:
common:
- - output_types: [conda, pyproject]
+ - output_types: [conda, pyproject, requirements]
packages:
- cython>=3.0.0
- - &pylibraft pylibraft==23.10.*
- - &rmm rmm==23.10.*
- scikit-build>=0.13.1
- python_build_cugraph:
- common:
- - output_types: [conda, pyproject]
- packages:
- - &pylibcugraph pylibcugraph==23.10.*
python_run_cugraph:
common:
- output_types: [conda, pyproject]
packages:
- - &cudf cudf==23.10.*
- - &dask dask>=2023.7.1
- - &distributed distributed>=2023.7.1
+ - &dask dask==2023.9.2
+ - &distributed distributed==2023.9.2
- &dask_cuda dask-cuda==23.10.*
- - &dask_cudf dask-cudf==23.10.*
- &numba numba>=0.57
- - raft-dask==23.10.*
- - *rmm
- &ucx_py ucx-py==0.34.*
- output_types: conda
packages:
- aiohttp
- - &cupy cupy>=12.0.0
- - &dask-core dask-core>=2023.7.1
+ - &dask-core_conda dask-core==2023.9.2
- fsspec>=0.6.0
- libcudf==23.10.*
- requests
@@ -376,29 +389,14 @@ dependencies:
- ucx-proc=*=gpu
- output_types: pyproject
packages:
- - &cupy_pip cupy-cuda11x>=12.0.0
# cudf uses fsspec but is protocol independent. cugraph
# dataset APIs require [http] extras for use with cudf.
- fsspec[http]>=0.6.0
- - *pylibcugraph
- python_run_pylibcugraph:
- common:
- - output_types: [conda, pyproject]
- packages:
- - *pylibraft
- - *rmm
python_run_nx_cugraph:
common:
- output_types: [conda, pyproject]
packages:
- networkx>=3.0
- - output_types: conda
- packages:
- - *cupy
- - output_types: pyproject
- packages:
- - *cupy_pip
- - *pylibcugraph
python_run_cugraph_dgl:
common:
- output_types: [conda, pyproject]
@@ -426,23 +424,18 @@ dependencies:
common:
- output_types: [conda, pyproject]
packages:
- - *cudf
- *dask
- *dask_cuda
- - *dask_cudf
- *distributed
- *numba
- *numpy
- - *rmm
- *thrift
- *ucx_py
- output_types: conda
packages:
- - *cupy
- - *dask-core
+ - *dask-core_conda
- output_types: pyproject
packages:
- - *cupy_pip
- *cugraph
- cugraph-service-client==23.10.*
doc:
@@ -488,11 +481,13 @@ dependencies:
- *numpy
- python-louvain
- scikit-learn>=0.23.1
+ - output_types: [conda]
+ packages:
+ - pylibwholegraph==23.10.*
test_python_pylibcugraph:
common:
- output_types: [conda, pyproject]
packages:
- - *cudf
- *numpy
test_python_nx_cugraph:
common:
@@ -519,3 +514,192 @@ dependencies:
- pytorch==2.0
- pytorch-cuda==11.8
- pyg=2.3.1=*torch_2.0.0*cu118*
+
+ depends_on_rmm:
+ common:
+ - output_types: conda
+ packages:
+ - &rmm_conda rmm==23.10.*
+ - output_types: requirements
+ packages:
+ # pip recognizes the index as a global option for the requirements.txt file
+ - --extra-index-url=https://pypi.nvidia.com
+ specific:
+ - output_types: [requirements, pyproject]
+ matrices:
+ - matrix: {cuda: "12.2"}
+ packages: &rmm_packages_pip_cu12
+ - rmm-cu12==23.10.*
+ - {matrix: {cuda: "12.1"}, packages: *rmm_packages_pip_cu12}
+ - {matrix: {cuda: "12.0"}, packages: *rmm_packages_pip_cu12}
+ - matrix: {cuda: "11.8"}
+ packages: &rmm_packages_pip_cu11
+ - rmm-cu11==23.10.*
+ - {matrix: {cuda: "11.5"}, packages: *rmm_packages_pip_cu11}
+ - {matrix: {cuda: "11.4"}, packages: *rmm_packages_pip_cu11}
+ - {matrix: {cuda: "11.2"}, packages: *rmm_packages_pip_cu11}
+ - {matrix: null, packages: [*rmm_conda]}
+
+ depends_on_cudf:
+ common:
+ - output_types: conda
+ packages:
+ - &cudf_conda cudf==23.10.*
+ - output_types: requirements
+ packages:
+ # pip recognizes the index as a global option for the requirements.txt file
+ - --extra-index-url=https://pypi.nvidia.com
+ specific:
+ - output_types: [requirements, pyproject]
+ matrices:
+ - matrix: {cuda: "12.2"}
+ packages: &cudf_packages_pip_cu12
+ - cudf-cu12==23.10.*
+ - {matrix: {cuda: "12.1"}, packages: *cudf_packages_pip_cu12}
+ - {matrix: {cuda: "12.0"}, packages: *cudf_packages_pip_cu12}
+ - matrix: {cuda: "11.8"}
+ packages: &cudf_packages_pip_cu11
+ - cudf-cu11==23.10.*
+ - {matrix: {cuda: "11.5"}, packages: *cudf_packages_pip_cu11}
+ - {matrix: {cuda: "11.4"}, packages: *cudf_packages_pip_cu11}
+ - {matrix: {cuda: "11.2"}, packages: *cudf_packages_pip_cu11}
+ - {matrix: null, packages: [*cudf_conda]}
+
+ depends_on_dask_cudf:
+ common:
+ - output_types: conda
+ packages:
+ - &dask_cudf_conda dask-cudf==23.10.*
+ - output_types: requirements
+ packages:
+ # pip recognizes the index as a global option for the requirements.txt file
+ - --extra-index-url=https://pypi.nvidia.com
+ specific:
+ - output_types: [requirements, pyproject]
+ matrices:
+ - matrix: {cuda: "12.2"}
+ packages: &dask_cudf_packages_pip_cu12
+ - dask-cudf-cu12==23.10.*
+ - {matrix: {cuda: "12.1"}, packages: *dask_cudf_packages_pip_cu12}
+ - {matrix: {cuda: "12.0"}, packages: *dask_cudf_packages_pip_cu12}
+ - matrix: {cuda: "11.8"}
+ packages: &dask_cudf_packages_pip_cu11
+ - dask-cudf-cu11==23.10.*
+ - {matrix: {cuda: "11.5"}, packages: *dask_cudf_packages_pip_cu11}
+ - {matrix: {cuda: "11.4"}, packages: *dask_cudf_packages_pip_cu11}
+ - {matrix: {cuda: "11.2"}, packages: *dask_cudf_packages_pip_cu11}
+ - {matrix: null, packages: [*dask_cudf_conda]}
+
+ depends_on_pylibraft:
+ common:
+ - output_types: conda
+ packages:
+ - &pylibraft_conda pylibraft==23.10.*
+ - output_types: requirements
+ packages:
+ # pip recognizes the index as a global option for the requirements.txt file
+ - --extra-index-url=https://pypi.nvidia.com
+ specific:
+ - output_types: [requirements, pyproject]
+ matrices:
+ - matrix: {cuda: "12.2"}
+ packages: &pylibraft_packages_pip_cu12
+ - pylibraft-cu12==23.10.*
+ - {matrix: {cuda: "12.1"}, packages: *pylibraft_packages_pip_cu12}
+ - {matrix: {cuda: "12.0"}, packages: *pylibraft_packages_pip_cu12}
+ - matrix: {cuda: "11.8"}
+ packages: &pylibraft_packages_pip_cu11
+ - pylibraft-cu11==23.10.*
+ - {matrix: {cuda: "11.5"}, packages: *pylibraft_packages_pip_cu11}
+ - {matrix: {cuda: "11.4"}, packages: *pylibraft_packages_pip_cu11}
+ - {matrix: {cuda: "11.2"}, packages: *pylibraft_packages_pip_cu11}
+ - {matrix: null, packages: [*pylibraft_conda]}
+
+ depends_on_raft_dask:
+ common:
+ - output_types: conda
+ packages:
+ - &raft_dask_conda raft-dask==23.10.*
+ - output_types: requirements
+ packages:
+ # pip recognizes the index as a global option for the requirements.txt file
+ - --extra-index-url=https://pypi.nvidia.com
+ specific:
+ - output_types: [requirements, pyproject]
+ matrices:
+ - matrix: {cuda: "12.2"}
+ packages: &raft_dask_packages_pip_cu12
+ - raft-dask-cu12==23.10.*
+ - {matrix: {cuda: "12.1"}, packages: *raft_dask_packages_pip_cu12}
+ - {matrix: {cuda: "12.0"}, packages: *raft_dask_packages_pip_cu12}
+ - matrix: {cuda: "11.8"}
+ packages: &raft_dask_packages_pip_cu11
+ - raft-dask-cu11==23.10.*
+ - {matrix: {cuda: "11.5"}, packages: *raft_dask_packages_pip_cu11}
+ - {matrix: {cuda: "11.4"}, packages: *raft_dask_packages_pip_cu11}
+ - {matrix: {cuda: "11.2"}, packages: *raft_dask_packages_pip_cu11}
+ - {matrix: null, packages: [*raft_dask_conda]}
+
+ depends_on_pylibcugraph:
+ common:
+ - output_types: conda
+ packages:
+ - &pylibcugraph_conda pylibcugraph==23.10.*
+ - output_types: requirements
+ packages:
+ # pip recognizes the index as a global option for the requirements.txt file
+ - --extra-index-url=https://pypi.nvidia.com
+ specific:
+ - output_types: [requirements, pyproject]
+ matrices:
+ - matrix: {cuda: "12.2"}
+ packages: &pylibcugraph_packages_pip_cu12
+ - pylibcugraph-cu12==23.10.*
+ - {matrix: {cuda: "12.1"}, packages: *pylibcugraph_packages_pip_cu12}
+ - {matrix: {cuda: "12.0"}, packages: *pylibcugraph_packages_pip_cu12}
+ - matrix: {cuda: "11.8"}
+ packages: &pylibcugraph_packages_pip_cu11
+ - pylibcugraph-cu11==23.10.*
+ - {matrix: {cuda: "11.5"}, packages: *pylibcugraph_packages_pip_cu11}
+ - {matrix: {cuda: "11.4"}, packages: *pylibcugraph_packages_pip_cu11}
+ - {matrix: {cuda: "11.2"}, packages: *pylibcugraph_packages_pip_cu11}
+ - {matrix: null, packages: [*pylibcugraph_conda]}
+
+ depends_on_cupy:
+ common:
+ - output_types: conda
+ packages:
+ - cupy>=12.0.0
+ specific:
+ - output_types: [requirements, pyproject]
+ matrices:
+ # All CUDA 12 + x86_64 versions
+ - matrix: {cuda: "12.2", arch: x86_64}
+ packages: &cupy_packages_cu12_x86_64
+ - cupy-cuda12x>=12.0.0
+ - {matrix: {cuda: "12.1", arch: x86_64}, packages: *cupy_packages_cu12_x86_64}
+ - {matrix: {cuda: "12.0", arch: x86_64}, packages: *cupy_packages_cu12_x86_64}
+
+ # All CUDA 12 + aarch64 versions
+ - matrix: {cuda: "12.2", arch: aarch64}
+ packages: &cupy_packages_cu12_aarch64
+ - cupy-cuda12x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works.
+ - {matrix: {cuda: "12.1", arch: aarch64}, packages: *cupy_packages_cu12_aarch64}
+ - {matrix: {cuda: "12.0", arch: aarch64}, packages: *cupy_packages_cu12_aarch64}
+
+ # All CUDA 11 + x86_64 versions
+ - matrix: {cuda: "11.8", arch: x86_64}
+ packages: &cupy_packages_cu11_x86_64
+ - cupy-cuda11x>=12.0.0
+ - {matrix: {cuda: "11.5", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
+ - {matrix: {cuda: "11.4", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
+ - {matrix: {cuda: "11.2", arch: x86_64}, packages: *cupy_packages_cu11_x86_64}
+
+ # All CUDA 11 + aarch64 versions
+ - matrix: {cuda: "11.8", arch: aarch64}
+ packages: &cupy_packages_cu11_aarch64
+ - cupy-cuda11x -f https://pip.cupy.dev/aarch64 # TODO: Verify that this works.
+ - {matrix: {cuda: "11.5", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
+ - {matrix: {cuda: "11.4", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
+ - {matrix: {cuda: "11.2", arch: aarch64}, packages: *cupy_packages_cu11_aarch64}
+ - {matrix: null, packages: [cupy-cuda11x>=12.0.0]}
diff --git a/docs/cugraph/source/installation/source_build.md b/docs/cugraph/source/installation/source_build.md
index 7782591f1ce..f5ee0741da6 100644
--- a/docs/cugraph/source/installation/source_build.md
+++ b/docs/cugraph/source/installation/source_build.md
@@ -6,10 +6,10 @@ The cuGraph package include both a C/C++ CUDA portion and a python portion. Bot
## Prerequisites
-__Compiler__:
-* `gcc` version 9.3+
-* `nvcc` version 11.0+
-* `cmake` version 3.20.1+
+__Compiler:__
+* `gcc` version 9.3+
+* `nvcc` version 11.0+
+* `cmake` version 3.20.1+
__CUDA:__
* CUDA 11.0+
@@ -18,6 +18,11 @@ __CUDA:__
You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads).
+__Packages:__
+* `cmake` version 3.20.1+
+* `libcugraphops` (version matching source branch version, eg. `23.10`)
+
+You can obtain `libcugraphops` using `conda`/`mamba` from the `nvidia` channel, or using `pip` with the `--extra-index-url=https://pypi.nvidia.com` option. See the [RAPIDS docs](https://docs.rapids.ai/install#environment) for more details.
## Building cuGraph
To install cuGraph from source, ensure the dependencies are met.
diff --git a/notebooks/algorithms/link_prediction/similarity_combined.ipynb b/notebooks/algorithms/link_prediction/similarity_combined.ipynb
new file mode 100644
index 00000000000..cd80ee34002
--- /dev/null
+++ b/notebooks/algorithms/link_prediction/similarity_combined.ipynb
@@ -0,0 +1,217 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Similarity Compared\n",
+ "----\n",
+ "\n",
+ "In this notebook, we will compute vertex similarity scores using the various cuGraph algorithms. We will then compare the similarities scores in tables.\n",
+ "\n",
+ "| Author Credit | Date | Update | cuGraph Version | Test Hardware |\n",
+ "| --------------|------------|------------------|-----------------|-----------------------|\n",
+ "| Don Acosta | 09/25/2023 | created | 23.10 nightly | AMPERE A6000 CUDA 11.7|\n",
+ "\n",
+ "\n",
+ "**Note: On large graphs these algorithms can take prohibitive time or memory. The notebook will show how to run on defined pairs instead.**\n",
+ "\n",
+ "The Similarity algorithms in cuGraph use different methods to compare pairs of vertices. All of them use the intersection of the set of adjacent nodes for the set overlap. However each of the three algorithms differ on the denominator to determine the similarity coefficients. All three are normalized between zero and one. where zero is no overlap at all and one means identical adjacencies.\n",
+ "\n",
+ "__Jaccard Similarity__
\n",
+ "The [Jaccard similarity](https://en.wikipedia.org/wiki/Jaccard_index) measure was developed by botonist, Paul Jaccard who used the measure to compare plant species. His work popularized the measure's use in in other fields as well.\n",
+ "\n",
+ "It can be expressed as:
\n",
+ "$\\text{Jaccard similarity} = \\frac{|A \\cap B|}{|A \\cup B|}$\n",
+ "\n",
+ "__Overlap Similarity__
\n",
+ "The [Overlap Similarity](https://en.wikipedia.org/wiki/Overlap_coefficient) is also known as the Szymkiewicz–Simpson coefficient. It is often used to compare binary and categorical data in the fields of Genome analysis, recommender systems and anomaly detection. It differs from the Jaccard measure above in that it uses the size of the smaller of the two set sizes as the denominator.\n",
+ "\n",
+ "It can be expressed as\n",
+ "\n",
+ "$oc(A,B)=\\frac{|A|\\cap|B|}{min(|A|,|B|)}$\n",
+ "\n",
+ "__Sørensen-Dice Coefficient__
\n",
+ "The [Sørensen coefficient](https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient#) is known as the Sørensen-Dice coefficient. It was independently developed for use by botonists Lee Raymond Dice and Thorvald Sørensen. Although originating in the field of Botony, the coefficient is now used in computer vision, Natural Language Processing(NLP) and Data Mining among other fields.\n",
+ "It differs from Jaccard and Overlap in that the calculation doubles the intersection size and divides it by the sum of the two set sizes.\n",
+ "\n",
+ "It can be expressed as\n",
+ "\n",
+ "Sørensen coefficient = $\\left(2 * |A \\cap B| \\right) \\over \\left(|A| + |B| \\right)$\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "# Now for the code !"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Load the required dependencies."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import cugraph\n",
+ "from cugraph.datasets import dining_prefs\n",
+ "# only needed to display results in a table \n",
+ "from IPython.display import display_html "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Function that calls all the cuGraph similarity/link prediction algorithms "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def compute_similarity(G,pairs=None):\n",
+ " _jdf = cugraph.jaccard(G,pairs)\n",
+ " _jdf2 = _jdf[ (_jdf['first'] != _jdf['second'] ) ]\n",
+ " _odf = cugraph.overlap(G,pairs)\n",
+ " _odf2 = _odf[ (_odf['first'] != _odf['second'] ) ]\n",
+ " _sdf = cugraph.sorensen_coefficient(G,pairs)\n",
+ " _sdf2 = _sdf[ (_sdf['first'] != _sdf['second'] ) ]\n",
+ " return _jdf2, _odf2, _sdf2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Function to put all the results in a convenient table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Print function\n",
+ "def print_similarity(jdf,odf,sdf,num_records=5):\n",
+ "\n",
+ " js_top = jdf.sort_values(by='jaccard_coeff', ascending=False).head(num_records).to_pandas()\n",
+ " os_top = odf.sort_values(by='overlap_coeff', ascending=False).head(num_records).to_pandas()\n",
+ " ss_top = sdf.sort_values(by='sorensen_coeff', ascending=False).head(num_records).to_pandas()\n",
+ " \n",
+ " df1_styler = js_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Jaccard').hide(axis='index')\n",
+ " df2_styler = os_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Overlap').hide(axis='index')\n",
+ " df3_styler = ss_top.style.set_table_attributes(\"style='display:inline'\").set_caption('Sørensen').hide(axis='index')\n",
+ "\n",
+ " display_html(df1_styler._repr_html_()+df2_styler._repr_html_()+df3_styler._repr_html_(), raw=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create the graph from the Dining preferences data set."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "G = dining_prefs.get_graph(download=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Run the three similarity Algorithms and print out the five links with the highest scores."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "jdf, odf, sdf = compute_similarity(G)\n",
+ "print_similarity(jdf,odf,sdf)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now find the the complete set of two-hop neigbors and compare them instead of just using the existing one-hop edges. In a larger graph, this will run considerably faster since the default "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# this cugraph algorithm pulls a set containing every pair of vertices\n",
+ "# that are within 2-hops of each other\n",
+ "two_hops_pairs = G.get_two_hop_neighbors()\n",
+ "\n",
+ "jdf_hops, odf_hops, sdf_hops = compute_similarity(G,pairs=two_hops_pairs)\n",
+ "print_similarity(jdf_hops,odf_hops,sdf_hops)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### It's that easy with cuGraph"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "----\n",
+ "Copyright (c) 2023, NVIDIA CORPORATION.\n",
+ "\n",
+ "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n",
+ "\n",
+ "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "cugraph_0802",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/python/cugraph-dgl/examples/graphsage/node-classification.py b/python/cugraph-dgl/examples/graphsage/node-classification.py
index 24df73ada75..320890b0312 100644
--- a/python/cugraph-dgl/examples/graphsage/node-classification.py
+++ b/python/cugraph-dgl/examples/graphsage/node-classification.py
@@ -39,14 +39,16 @@
def set_allocators():
+ import rmm
import cudf
import cupy
- import rmm
+ from rmm.allocators.torch import rmm_torch_allocator
+ from rmm.allocators.cupy import rmm_cupy_allocator
mr = rmm.mr.CudaAsyncMemoryResource()
rmm.mr.set_current_device_resource(mr)
- torch.cuda.memory.change_current_allocator(rmm.rmm_torch_allocator)
- cupy.cuda.set_allocator(rmm.allocators.cupy.rmm_cupy_allocator)
+ torch.cuda.memory.change_current_allocator(rmm_torch_allocator)
+ cupy.cuda.set_allocator(rmm_cupy_allocator)
cudf.set_option("spill", True)
diff --git a/python/cugraph-service/server/pyproject.toml b/python/cugraph-service/server/pyproject.toml
index f25ea6c46e5..8787cb838be 100644
--- a/python/cugraph-service/server/pyproject.toml
+++ b/python/cugraph-service/server/pyproject.toml
@@ -25,8 +25,8 @@ dependencies = [
"cupy-cuda11x>=12.0.0",
"dask-cuda==23.10.*",
"dask-cudf==23.10.*",
- "dask>=2023.7.1",
- "distributed>=2023.7.1",
+ "dask==2023.9.2",
+ "distributed==2023.9.2",
"numba>=0.57",
"numpy>=1.21",
"rmm==23.10.*",
diff --git a/python/cugraph/CMakeLists.txt b/python/cugraph/CMakeLists.txt
index ecfcb9b219f..64db9571dc9 100644
--- a/python/cugraph/CMakeLists.txt
+++ b/python/cugraph/CMakeLists.txt
@@ -82,7 +82,6 @@ endif()
rapids_cython_init()
-add_subdirectory(cugraph/community)
add_subdirectory(cugraph/components)
add_subdirectory(cugraph/dask/comms)
add_subdirectory(cugraph/dask/structure)
diff --git a/python/cugraph/cugraph/community/CMakeLists.txt b/python/cugraph/cugraph/community/CMakeLists.txt
deleted file mode 100644
index 185f6accbab..00000000000
--- a/python/cugraph/cugraph/community/CMakeLists.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-# =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-set(cython_sources
- ktruss_subgraph_wrapper.pyx
-)
-
-set(linked_libraries cugraph::cugraph)
-rapids_cython_create_modules(
- CXX
- SOURCE_FILES "${cython_sources}"
- LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX community_
- ASSOCIATED_TARGETS cugraph
-)
diff --git a/python/cugraph/cugraph/community/ktruss_subgraph.pxd b/python/cugraph/cugraph/community/ktruss_subgraph.pxd
deleted file mode 100644
index d993c31c375..00000000000
--- a/python/cugraph/cugraph/community/ktruss_subgraph.pxd
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-# cython: language_level = 3
-
-from cugraph.structure.graph_primtypes cimport *
-
-
-cdef extern from "cugraph/algorithms.hpp" namespace "cugraph":
-
- cdef unique_ptr[GraphCOO[VT,ET,WT]] k_truss_subgraph[VT,ET,WT](
- const GraphCOOView[VT,ET,WT] &graph,
- int k) except +
diff --git a/python/cugraph/cugraph/community/ktruss_subgraph.py b/python/cugraph/cugraph/community/ktruss_subgraph.py
index 0ebbe633317..15a10007610 100644
--- a/python/cugraph/cugraph/community/ktruss_subgraph.py
+++ b/python/cugraph/cugraph/community/ktruss_subgraph.py
@@ -11,14 +11,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from cugraph.community import ktruss_subgraph_wrapper
from cugraph.structure.graph_classes import Graph
+from typing import Union
from cugraph.utilities import (
ensure_cugraph_obj_for_nx,
cugraph_to_nx,
)
+from pylibcugraph import k_truss_subgraph as pylibcugraph_k_truss_subgraph
+from pylibcugraph import ResourceHandle
+import warnings
+
from numba import cuda
+import cudf
+from cugraph.utilities.utils import import_optional
+
+# FIXME: the networkx.Graph type used in the type annotation for
+# ktruss_subgraph() is specified using a string literal to avoid depending on
+# and importing networkx. Instead, networkx is imported optionally, which may
+# cause a problem for a type checker if run in an environment where networkx is
+# not installed.
+networkx = import_optional("networkx")
# FIXME: special case for ktruss on CUDA 11.4: an 11.4 bug causes ktruss to
@@ -39,7 +52,9 @@ def _ensure_compatible_cuda_version():
)
-def k_truss(G, k):
+def k_truss(
+ G: Union[Graph, "networkx.Graph"], k: int
+) -> Union[Graph, "networkx.Graph"]:
"""
Returns the K-Truss subgraph of a graph for a specific k.
@@ -90,7 +105,11 @@ def k_truss(G, k):
# FIXME: merge this function with k_truss
-def ktruss_subgraph(G, k, use_weights=True):
+def ktruss_subgraph(
+ G: Union[Graph, "networkx.Graph"],
+ k: int,
+ use_weights=True, # deprecated
+) -> Graph:
"""
Returns the K-Truss subgraph of a graph for a specific k.
@@ -103,7 +122,7 @@ def ktruss_subgraph(G, k, use_weights=True):
finding the maximal k-clique is known to be NP-Hard.
In contrast, finding a k-truss is computationally tractable as its
- key building block, namely triangle counting counting, can be executed
+ key building block, namely triangle counting, can be executed
in polnymomial time.Typically, it takes many iterations of triangle
counting to find the k-truss of a graph. Yet these iterations operate
on a weakly monotonically shrinking graph.
@@ -141,7 +160,10 @@ def ktruss_subgraph(G, k, use_weights=True):
The desired k to be used for extracting the k-truss subgraph.
use_weights : bool, optional (default=True)
- whether the output should contain the edge weights if G has them
+ Whether the output should contain the edge weights if G has them.
+
+ Deprecated: If 'weights' were passed at the graph creation, they will
+ be used.
Returns
-------
@@ -162,7 +184,27 @@ def ktruss_subgraph(G, k, use_weights=True):
if G.is_directed():
raise ValueError("input graph must be undirected")
- subgraph_df = ktruss_subgraph_wrapper.ktruss_subgraph(G, k, use_weights)
+ if use_weights:
+ warning_msg = (
+ "The use_weights flag is deprecated "
+ "and will be removed in the next release. if weights "
+ "were passed at the graph creation, they will be used."
+ )
+ warnings.warn(warning_msg, FutureWarning)
+
+ sources, destinations, edge_weights, _ = pylibcugraph_k_truss_subgraph(
+ resource_handle=ResourceHandle(),
+ graph=G._plc_graph,
+ k=k,
+ do_expensive_check=True,
+ )
+
+ subgraph_df = cudf.DataFrame()
+ subgraph_df["src"] = sources
+ subgraph_df["dst"] = destinations
+ if edge_weights is not None:
+ subgraph_df["weight"] = edge_weights
+
if G.renumbered:
subgraph_df = G.unrenumber(subgraph_df, "src")
subgraph_df = G.unrenumber(subgraph_df, "dst")
diff --git a/python/cugraph/cugraph/community/ktruss_subgraph_wrapper.pyx b/python/cugraph/cugraph/community/ktruss_subgraph_wrapper.pyx
deleted file mode 100644
index 8b705e8a7b4..00000000000
--- a/python/cugraph/cugraph/community/ktruss_subgraph_wrapper.pyx
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-# cython: language_level = 3
-
-from cugraph.community.ktruss_subgraph cimport *
-from cugraph.structure.graph_primtypes cimport *
-from cugraph.structure import graph_primtypes_wrapper
-import numpy as np
-
-
-def ktruss_subgraph_float(input_graph, k, use_weights):
- cdef GraphCOOViewFloat in_graph = get_coo_float_graph_view(input_graph, use_weights)
- return coo_to_df(move(k_truss_subgraph[int,int,float](in_graph, k)))
-
-
-def ktruss_subgraph_double(input_graph, k, use_weights):
- cdef GraphCOOViewDouble in_graph = get_coo_double_graph_view(input_graph, use_weights)
- return coo_to_df(move(k_truss_subgraph[int,int,double](in_graph, k)))
-
-
-def ktruss_subgraph(input_graph, k, use_weights):
- [input_graph.edgelist.edgelist_df['src'],
- input_graph.edgelist.edgelist_df['dst']] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'],
- input_graph.edgelist.edgelist_df['dst']],
- [np.int32])
- if graph_primtypes_wrapper.weight_type(input_graph) == np.float64 and use_weights:
- return ktruss_subgraph_double(input_graph, k, use_weights)
- else:
- return ktruss_subgraph_float(input_graph, k, use_weights)
diff --git a/python/cugraph/cugraph/community/louvain.py b/python/cugraph/cugraph/community/louvain.py
index 7f9742c8f09..0bedd427824 100644
--- a/python/cugraph/cugraph/community/louvain.py
+++ b/python/cugraph/cugraph/community/louvain.py
@@ -11,7 +11,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from typing import Union, Tuple
+from cugraph.structure import Graph
from cugraph.utilities import (
+ is_nx_graph_type,
ensure_cugraph_obj_for_nx,
df_score_to_dictionary,
)
@@ -21,9 +24,26 @@
from pylibcugraph import louvain as pylibcugraph_louvain
from pylibcugraph import ResourceHandle
+from cugraph.utilities.utils import import_optional
+
+# FIXME: the networkx.Graph type used in type annotations is specified
+# using a string literal to avoid depending on and importing networkx.
+# Instead, networkx is imported optionally, which may cause a problem
+# for a type checker if run in an environment where networkx is not installed.
+networkx = import_optional("networkx")
+
+VERTEX_COL_NAME = "vertex"
+CLUSTER_ID_COL_NAME = "partition"
+
# FIXME: max_level should default to 100 once max_iter is removed
-def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
+def louvain(
+ G: Union[Graph, "networkx.Graph"],
+ max_level: Union[int, None] = None,
+ max_iter: Union[int, None] = None,
+ resolution: float = 1.0,
+ threshold: float = 1e-7,
+) -> Tuple[Union[cudf.DataFrame, dict], float]:
"""
Compute the modularity optimizing partition of the input graph using the
Louvain method
@@ -48,6 +68,9 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
than the specified number of levels. No error occurs when the
algorithm terminates early in this manner.
+ If max_level > 500, it will be set to 500 and a warning is emitted
+ in order to prevent excessive runtime.
+
max_iter : integer, optional (default=None)
This parameter is deprecated in favor of max_level. Previously
it was used to control the maximum number of levels of the Louvain
@@ -68,18 +91,21 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
Returns
-------
- parts : cudf.DataFrame
- GPU data frame of size V containing two columns the vertex id and the
- partition id it is assigned to.
+ result: cudf.DataFrame or dict
+ If input graph G is of type cugraph.Graph, a GPU dataframe
+ with two columns.
+
+ result[VERTEX_COL_NAME] : cudf.Series
+ Contains the vertex identifiers
+ result[CLUSTER_ID_COL_NAME] : cudf.Series
+ Contains the partition assigned to the vertices
- df['vertex'] : cudf.Series
- Contains the vertex identifiers
- df['partition'] : cudf.Series
- Contains the partition assigned to the vertices
+ If input graph G is of type networkx.Graph, a dict
+ Dictionary of vertices and their partition ids.
modularity_score : float
- a floating point number containing the global modularity score of the
- partitioning.
+ A floating point number containing the global modularity score
+ of the partitioning.
Examples
--------
@@ -89,6 +115,17 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
"""
+ # FIXME: Onece the graph construction calls support isolated vertices through
+ # the C API (the C++ interface already supports this) then there will be
+ # no need to compute isolated vertices here.
+
+ isolated_vertices = list()
+ if is_nx_graph_type(type(G)):
+ isolated_vertices = [v for v in range(G.number_of_nodes()) if G.degree[v] == 0]
+ else:
+ # FIXME: Gather isolated vertices of G
+ pass
+
G, isNx = ensure_cugraph_obj_for_nx(G)
if G.is_directed():
@@ -112,7 +149,12 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
if max_level is None:
max_level = 100
- vertex, partition, mod_score = pylibcugraph_louvain(
+ if max_level > 500:
+ w_msg = "max_level is set too high, clamping it down to 500."
+ warnings.warn(w_msg)
+ max_level = 500
+
+ vertex, partition, modularity_score = pylibcugraph_louvain(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
max_level=max_level,
@@ -121,14 +163,27 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
do_expensive_check=False,
)
- df = cudf.DataFrame()
- df["vertex"] = vertex
- df["partition"] = partition
+ result = cudf.DataFrame()
+ result[VERTEX_COL_NAME] = vertex
+ result[CLUSTER_ID_COL_NAME] = partition
+
+ if len(isolated_vertices) > 0:
+ unique_cids = result[CLUSTER_ID_COL_NAME].unique()
+ max_cluster_id = -1 if len(result) == 0 else unique_cids.max()
+
+ isolated_vtx_and_cids = cudf.DataFrame()
+ isolated_vtx_and_cids[VERTEX_COL_NAME] = isolated_vertices
+ isolated_vtx_and_cids[CLUSTER_ID_COL_NAME] = [
+ (max_cluster_id + i + 1) for i in range(len(isolated_vertices))
+ ]
+ result = cudf.concat(
+ [result, isolated_vtx_and_cids], ignore_index=True, sort=False
+ )
- if G.renumbered:
- df = G.unrenumber(df, "vertex")
+ if G.renumbered and len(G.input_df) > 0:
+ result = G.unrenumber(result, VERTEX_COL_NAME)
if isNx is True:
- df = df_score_to_dictionary(df, "partition")
+ result = df_score_to_dictionary(result, CLUSTER_ID_COL_NAME)
- return df, mod_score
+ return result, modularity_score
diff --git a/python/cugraph/cugraph/dask/link_prediction/jaccard.py b/python/cugraph/cugraph/dask/link_prediction/jaccard.py
index 218e6206fc3..5362c7a9e1e 100644
--- a/python/cugraph/cugraph/dask/link_prediction/jaccard.py
+++ b/python/cugraph/cugraph/dask/link_prediction/jaccard.py
@@ -118,7 +118,9 @@ def jaccard(input_graph, vertex_pair=None, use_weight=False):
adjacent vertices in the graph.
use_weight : bool, optional (default=False)
- Currently not supported
+ Flag to indicate whether to compute weighted jaccard (if use_weight==True)
+ or un-weighted jaccard (if use_weight==False).
+ 'input_graph' must be weighted if 'use_weight=True'.
Returns
-------
@@ -144,12 +146,6 @@ def jaccard(input_graph, vertex_pair=None, use_weight=False):
vertex_pair_col_name = vertex_pair.columns
- if use_weight:
- raise ValueError("'use_weight' is currently not supported.")
-
- if input_graph.is_weighted():
- raise ValueError("Weighted graphs are currently not supported.")
-
if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)):
vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
diff --git a/python/cugraph/cugraph/dask/link_prediction/overlap.py b/python/cugraph/cugraph/dask/link_prediction/overlap.py
index 5540be28fd1..4bda05e3c95 100644
--- a/python/cugraph/cugraph/dask/link_prediction/overlap.py
+++ b/python/cugraph/cugraph/dask/link_prediction/overlap.py
@@ -96,7 +96,9 @@ def overlap(input_graph, vertex_pair=None, use_weight=False):
adjacent vertices in the graph.
use_weight : bool, optional (default=False)
- Currently not supported
+ Flag to indicate whether to compute weighted overlap (if use_weight==True)
+ or un-weighted overlap (if use_weight==False).
+ 'input_graph' must be weighted if 'use_weight=True'.
Returns
-------
@@ -122,12 +124,6 @@ def overlap(input_graph, vertex_pair=None, use_weight=False):
vertex_pair_col_name = vertex_pair.columns
- if use_weight:
- raise ValueError("'use_weight' is currently not supported.")
-
- if input_graph.is_weighted():
- raise ValueError("Weighted graphs are currently not supported.")
-
if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)):
vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
diff --git a/python/cugraph/cugraph/dask/link_prediction/sorensen.py b/python/cugraph/cugraph/dask/link_prediction/sorensen.py
index 24295ac330c..163b0d0dc16 100644
--- a/python/cugraph/cugraph/dask/link_prediction/sorensen.py
+++ b/python/cugraph/cugraph/dask/link_prediction/sorensen.py
@@ -92,7 +92,9 @@ def sorensen(input_graph, vertex_pair=None, use_weight=False):
adjacent vertices in the graph.
use_weight : bool, optional (default=False)
- Currently not supported
+ Flag to indicate whether to compute weighted sorensen (if use_weight==True)
+ or un-weighted sorensen (if use_weight==False).
+ 'input_graph' must be weighted if 'use_weight=True'.
Returns
-------
@@ -118,12 +120,6 @@ def sorensen(input_graph, vertex_pair=None, use_weight=False):
vertex_pair_col_name = vertex_pair.columns
- if use_weight:
- raise ValueError("'use_weight' is currently not supported.")
-
- if input_graph.is_weighted():
- raise ValueError("Weighted graphs are currently not supported.")
-
if isinstance(vertex_pair, (dask_cudf.DataFrame, cudf.DataFrame)):
vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
diff --git a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
index 9e50169b4a7..03746561817 100644
--- a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py
@@ -42,6 +42,7 @@
if TYPE_CHECKING:
from cugraph import Graph
+
src_n = "sources"
dst_n = "destinations"
indices_n = "indices"
@@ -71,8 +72,21 @@ def create_empty_df(indices_t, weight_t):
def create_empty_df_with_edge_props(
- indices_t, weight_t, return_offsets=False, renumber=False
+ indices_t,
+ weight_t,
+ return_offsets=False,
+ renumber=False,
+ use_legacy_names=True,
+ include_hop_column=True,
+ compression="COO",
):
+ if compression != "COO":
+ majors_name = "major_offsets"
+ else:
+ majors_name = src_n if use_legacy_names else "majors"
+
+ minors_name = dst_n if use_legacy_names else "minors"
+
if renumber:
empty_df_renumber = cudf.DataFrame(
{
@@ -84,14 +98,17 @@ def create_empty_df_with_edge_props(
if return_offsets:
df = cudf.DataFrame(
{
- src_n: numpy.empty(shape=0, dtype=indices_t),
- dst_n: numpy.empty(shape=0, dtype=indices_t),
+ majors_name: numpy.empty(shape=0, dtype=indices_t),
+ minors_name: numpy.empty(shape=0, dtype=indices_t),
weight_n: numpy.empty(shape=0, dtype=weight_t),
edge_id_n: numpy.empty(shape=0, dtype=indices_t),
edge_type_n: numpy.empty(shape=0, dtype="int32"),
- hop_id_n: numpy.empty(shape=0, dtype="int32"),
}
)
+
+ if include_hop_column:
+ df[hop_id_n] = numpy.empty(shape=0, dtype="int32")
+
empty_df_offsets = cudf.DataFrame(
{
offsets_n: numpy.empty(shape=0, dtype="int32"),
@@ -106,13 +123,13 @@ def create_empty_df_with_edge_props(
else:
df = cudf.DataFrame(
{
- src_n: numpy.empty(shape=0, dtype=indices_t),
- dst_n: numpy.empty(shape=0, dtype=indices_t),
+ majors_name: numpy.empty(shape=0, dtype=indices_t),
+ minors_name: numpy.empty(shape=0, dtype=indices_t),
weight_n: numpy.empty(shape=0, dtype=weight_t),
edge_id_n: numpy.empty(shape=0, dtype=indices_t),
edge_type_n: numpy.empty(shape=0, dtype="int32"),
- hop_id_n: numpy.empty(shape=0, dtype="int32"),
batch_id_n: numpy.empty(shape=0, dtype="int32"),
+ hop_id_n: numpy.empty(shape=0, dtype="int32"),
}
)
if renumber:
@@ -121,102 +138,6 @@ def create_empty_df_with_edge_props(
return df
-def convert_to_cudf(
- cp_arrays, weight_t, with_edge_properties, return_offsets=False, renumber=False
-):
- """
- Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper
- """
- df = cudf.DataFrame()
-
- if with_edge_properties:
- if renumber:
- (
- sources,
- destinations,
- weights,
- edge_ids,
- edge_types,
- batch_ids,
- offsets,
- hop_ids,
- renumber_map,
- renumber_map_offsets,
- ) = cp_arrays
- else:
- (
- sources,
- destinations,
- weights,
- edge_ids,
- edge_types,
- batch_ids,
- offsets,
- hop_ids,
- ) = cp_arrays
-
- df[src_n] = sources
- df[dst_n] = destinations
- df[weight_n] = weights
- df[edge_id_n] = edge_ids
- df[edge_type_n] = edge_types
- df[hop_id_n] = hop_ids
-
- return_dfs = [df]
-
- if return_offsets:
- offsets_df = cudf.DataFrame(
- {
- batch_id_n: batch_ids,
- offsets_n: offsets[:-1],
- }
- )
-
- if renumber:
- offsets_df[map_offsets_n] = renumber_map_offsets[:-1]
-
- return_dfs.append(offsets_df)
- else:
- batch_ids_b = batch_ids
- if len(batch_ids_b) > 0:
- batch_ids_b = cudf.Series(batch_ids_b).repeat(cp.diff(offsets))
- batch_ids_b.reset_index(drop=True, inplace=True)
-
- df[batch_id_n] = batch_ids_b
-
- if renumber:
- renumber_df = cudf.DataFrame(
- {
- "map": renumber_map,
- }
- )
-
- if not return_offsets:
- batch_ids_r = cudf.Series(batch_ids).repeat(
- cp.diff(renumber_map_offsets)
- )
- batch_ids_r.reset_index(drop=True, inplace=True)
- renumber_df["batch_id"] = batch_ids_r
-
- return_dfs.append(renumber_df)
-
- return tuple(return_dfs)
- else:
- cupy_sources, cupy_destinations, cupy_indices = cp_arrays
-
- df[src_n] = cupy_sources
- df[dst_n] = cupy_destinations
- df[indices_n] = cupy_indices
-
- if cupy_indices is not None:
- if weight_t == "int32":
- df.indices = df.indices.astype("int32")
- elif weight_t == "int64":
- df.indices = df.indices.astype("int64")
-
- return (df,)
-
-
def __get_label_to_output_comm_rank(min_batch_id, max_batch_id, n_workers):
num_batches = max_batch_id - min_batch_id + 1
num_batches = int(num_batches)
@@ -246,6 +167,10 @@ def _call_plc_uniform_neighbor_sample(
prior_sources_behavior=None,
deduplicate_sources=False,
renumber=False,
+ use_legacy_names=True,
+ include_hop_column=True,
+ compress_per_hop=False,
+ compression="COO",
):
st_x = st_x[0]
start_list_x = st_x[start_col_name]
@@ -259,7 +184,7 @@ def _call_plc_uniform_neighbor_sample(
min_batch_id, max_batch_id, n_workers
)
- cp_arrays = pylibcugraph_uniform_neighbor_sample(
+ cupy_array_dict = pylibcugraph_uniform_neighbor_sample(
resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()),
input_graph=mg_graph_x,
start_list=start_list_x,
@@ -275,13 +200,25 @@ def _call_plc_uniform_neighbor_sample(
deduplicate_sources=deduplicate_sources,
return_hops=return_hops,
renumber=renumber,
+ compression=compression,
+ compress_per_hop=compress_per_hop,
+ return_dict=True,
+ )
+
+ # have to import here due to circular import issue
+ from cugraph.sampling.sampling_utilities import (
+ sampling_results_from_cupy_array_dict,
)
- return convert_to_cudf(
- cp_arrays,
+
+ return sampling_results_from_cupy_array_dict(
+ cupy_array_dict,
weight_t,
- with_edge_properties,
+ len(fanout_vals),
+ with_edge_properties=with_edge_properties,
return_offsets=return_offsets,
renumber=renumber,
+ use_legacy_names=use_legacy_names,
+ include_hop_column=include_hop_column,
)
@@ -304,6 +241,10 @@ def _mg_call_plc_uniform_neighbor_sample(
prior_sources_behavior=None,
deduplicate_sources=False,
renumber=False,
+ use_legacy_names=True,
+ include_hop_column=True,
+ compress_per_hop=False,
+ compression="COO",
):
n_workers = None
if keep_batches_together:
@@ -335,6 +276,10 @@ def _mg_call_plc_uniform_neighbor_sample(
prior_sources_behavior=prior_sources_behavior,
deduplicate_sources=deduplicate_sources,
renumber=renumber,
+ use_legacy_names=use_legacy_names, # remove in 23.12
+ include_hop_column=include_hop_column, # remove in 23.12
+ compress_per_hop=compress_per_hop,
+ compression=compression,
allow_other_workers=False,
pure=False,
)
@@ -348,6 +293,9 @@ def _mg_call_plc_uniform_neighbor_sample(
weight_t,
return_offsets=return_offsets,
renumber=renumber,
+ use_legacy_names=use_legacy_names,
+ compression=compression,
+ include_hop_column=include_hop_column,
)
if with_edge_properties
else create_empty_df(indices_t, weight_t)
@@ -397,6 +345,7 @@ def uniform_neighbor_sample(
input_graph: Graph,
start_list: Sequence,
fanout_vals: List[int],
+ *,
with_replacement: bool = True,
with_edge_properties: bool = False, # deprecated
with_batch_ids: bool = False,
@@ -406,9 +355,13 @@ def uniform_neighbor_sample(
random_state: int = None,
return_offsets: bool = False,
return_hops: bool = True,
+ include_hop_column: bool = True, # deprecated
prior_sources_behavior: str = None,
deduplicate_sources: bool = False,
renumber: bool = False,
+ use_legacy_names=True, # deprecated
+ compress_per_hop=False,
+ compression="COO",
_multiple_clients: bool = False,
) -> Union[dask_cudf.DataFrame, Tuple[dask_cudf.DataFrame, dask_cudf.DataFrame]]:
"""
@@ -463,6 +416,12 @@ def uniform_neighbor_sample(
corresponding to the hop where the edge appeared.
Defaults to True.
+ include_hop_column: bool, optional (default=True)
+ Deprecated. Defaults to True.
+ If True, will include the hop column even if
+ return_offsets is True. This option will
+ be removed in release 23.12.
+
prior_sources_behavior: str (Optional)
Options are "carryover", and "exclude".
Default will leave the source list as-is.
@@ -481,6 +440,21 @@ def uniform_neighbor_sample(
will return the renumber map and renumber map offsets
as an additional dataframe.
+ use_legacy_names: bool, optional (default=True)
+ Whether to use the legacy column names (sources, destinations).
+ If True, will use "sources" and "destinations" as the column names.
+ If False, will use "majors" and "minors" as the column names.
+ Deprecated. Will be removed in release 23.12 in favor of always
+ using the new names "majors" and "minors".
+
+ compress_per_hop: bool, optional (default=False)
+ Whether to compress globally (default), or to produce a separate
+ compressed edgelist per hop.
+
+ compression: str, optional (default=COO)
+ Sets the compression type for the output minibatches.
+ Valid options are COO (default), CSR, CSC, DCSR, and DCSC.
+
_multiple_clients: bool, optional (default=False)
internal flag to ensure sampling works with multiple dask clients
set to True to prevent hangs in multi-client environment
@@ -548,12 +522,46 @@ def uniform_neighbor_sample(
Contains the batch offsets for the renumber maps
"""
+ if compression not in ["COO", "CSR", "CSC", "DCSR", "DCSC"]:
+ raise ValueError("compression must be one of COO, CSR, CSC, DCSR, or DCSC")
+
if with_edge_properties:
warning_msg = (
"The with_edge_properties flag is deprecated"
" and will be removed in the next release."
)
- warnings.warn(warning_msg, DeprecationWarning)
+ warnings.warn(warning_msg, FutureWarning)
+
+ if (
+ (compression != "COO")
+ and (not compress_per_hop)
+ and prior_sources_behavior != "exclude"
+ ):
+ raise ValueError(
+ "hop-agnostic compression is only supported with"
+ " the exclude prior sources behavior due to limitations "
+ "of the libcugraph C++ API"
+ )
+
+ if compress_per_hop and prior_sources_behavior != "carryover":
+ raise ValueError(
+ "Compressing the edgelist per hop is only supported "
+ "with the carryover prior sources behavior due to limitations"
+ " of the libcugraph C++ API"
+ )
+
+ if include_hop_column:
+ warning_msg = (
+ "The include_hop_column flag is deprecated and will be"
+ " removed in the next release in favor of always "
+ "excluding the hop column when return_offsets is True"
+ )
+ warnings.warn(warning_msg, FutureWarning)
+
+ if compression != "COO":
+ raise ValueError(
+ "Including the hop id column is only supported with COO compression."
+ )
if isinstance(start_list, int):
start_list = [start_list]
@@ -643,6 +651,31 @@ def uniform_neighbor_sample(
ddf = persist_dask_df_equal_parts_per_worker(ddf, client)
ddf = get_persisted_df_worker_map(ddf, client)
+ sample_call_kwargs = {
+ "client": client,
+ "session_id": session_id,
+ "input_graph": input_graph,
+ "ddf": ddf,
+ "keep_batches_together": keep_batches_together,
+ "min_batch_id": min_batch_id,
+ "max_batch_id": max_batch_id,
+ "fanout_vals": fanout_vals,
+ "with_replacement": with_replacement,
+ "weight_t": weight_t,
+ "indices_t": indices_t,
+ "with_edge_properties": with_edge_properties,
+ "random_state": random_state,
+ "return_offsets": return_offsets,
+ "return_hops": return_hops,
+ "prior_sources_behavior": prior_sources_behavior,
+ "deduplicate_sources": deduplicate_sources,
+ "renumber": renumber,
+ "use_legacy_names": use_legacy_names,
+ "include_hop_column": include_hop_column,
+ "compress_per_hop": compress_per_hop,
+ "compression": compression,
+ }
+
if _multiple_clients:
# Distributed centralized lock to allow
# two disconnected processes (clients) to coordinate a lock
@@ -650,26 +683,7 @@ def uniform_neighbor_sample(
lock = Lock("plc_graph_access")
if lock.acquire(timeout=100):
try:
- ddf = _mg_call_plc_uniform_neighbor_sample(
- client=client,
- session_id=session_id,
- input_graph=input_graph,
- ddf=ddf,
- keep_batches_together=keep_batches_together,
- min_batch_id=min_batch_id,
- max_batch_id=max_batch_id,
- fanout_vals=fanout_vals,
- with_replacement=with_replacement,
- weight_t=weight_t,
- indices_t=indices_t,
- with_edge_properties=with_edge_properties,
- random_state=random_state,
- return_offsets=return_offsets,
- return_hops=return_hops,
- prior_sources_behavior=prior_sources_behavior,
- deduplicate_sources=deduplicate_sources,
- renumber=renumber,
- )
+ ddf = _mg_call_plc_uniform_neighbor_sample(**sample_call_kwargs)
finally:
lock.release()
else:
@@ -677,26 +691,7 @@ def uniform_neighbor_sample(
"Failed to acquire lock(plc_graph_access) while trying to sampling"
)
else:
- ddf = _mg_call_plc_uniform_neighbor_sample(
- client=client,
- session_id=session_id,
- input_graph=input_graph,
- ddf=ddf,
- keep_batches_together=keep_batches_together,
- min_batch_id=min_batch_id,
- max_batch_id=max_batch_id,
- fanout_vals=fanout_vals,
- with_replacement=with_replacement,
- weight_t=weight_t,
- indices_t=indices_t,
- with_edge_properties=with_edge_properties,
- random_state=random_state,
- return_offsets=return_offsets,
- return_hops=return_hops,
- prior_sources_behavior=prior_sources_behavior,
- deduplicate_sources=deduplicate_sources,
- renumber=renumber,
- )
+ ddf = _mg_call_plc_uniform_neighbor_sample(**sample_call_kwargs)
if return_offsets:
if renumber:
@@ -708,9 +703,12 @@ def uniform_neighbor_sample(
ddf, renumber_df = ddf
if input_graph.renumbered and not renumber:
- ddf = input_graph.unrenumber(ddf, "sources", preserve_order=True)
- ddf = input_graph.unrenumber(ddf, "destinations", preserve_order=True)
-
+ if use_legacy_names:
+ ddf = input_graph.unrenumber(ddf, "sources", preserve_order=True)
+ ddf = input_graph.unrenumber(ddf, "destinations", preserve_order=True)
+ else:
+ ddf = input_graph.unrenumber(ddf, "majors", preserve_order=True)
+ ddf = input_graph.unrenumber(ddf, "minors", preserve_order=True)
if return_offsets:
if renumber:
return ddf, offsets_df, renumber_df
diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py
index 92caba6dbaf..dbfcb124ce5 100644
--- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py
+++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py
@@ -269,6 +269,7 @@ def flush(self) -> None:
with_edge_properties=True,
return_offsets=True,
renumber=self.__renumber,
+ # use_legacy_names=False,
)
if self.__renumber:
diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
index e9e5be26fc3..7e67eab83c9 100644
--- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
+++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py
@@ -15,10 +15,24 @@
import cudf
import cupy
-from typing import Union, Optional
+from math import ceil
+from pandas import isna
-def _write_samples_to_parquet(
+from typing import Union, Optional, List
+
+
+def create_df_from_disjoint_series(series_list: List[cudf.Series]):
+ series_list.sort(key=lambda s: len(s), reverse=True)
+
+ df = cudf.DataFrame()
+ for s in series_list:
+ df[s.name] = s
+
+ return df
+
+
+def _write_samples_to_parquet_csr(
results: cudf.DataFrame,
offsets: cudf.DataFrame,
renumber_map: cudf.DataFrame,
@@ -27,7 +41,184 @@ def _write_samples_to_parquet(
partition_info: Optional[Union[dict, str]] = None,
) -> cudf.Series:
"""
- Writes the samples to parquet.
+ Writes CSR/CSC compressed samples to parquet.
+
+ Batches that are empty are discarded, and the remaining non-empty
+ batches are renumbered to be contiguous starting from the first
+ batch id. This means that the output batch ids may not match
+ the input batch ids.
+
+ results: cudf.DataFrame
+ The results dataframe containing the sampled minibatches.
+ offsets: cudf.DataFrame
+ The offsets dataframe indicating the start/end of each minibatch
+ in the reuslts dataframe.
+ renumber_map: cudf.DataFrame
+ The renumber map containing the mapping of renumbered vertex ids
+ to original vertex ids.
+ batches_per_partition: int
+ The maximum number of minibatches allowed per written parquet partition.
+ output_path: str
+ The output path (where parquet files should be written to).
+ partition_info: Union[dict, str]
+ Either a dictionary containing partition data from dask, the string 'sg'
+ indicating that this is a single GPU write, or None indicating that this
+ function should perform a no-op (required by dask).
+
+ Returns an empty cudf series.
+ """
+ # Required by dask; need to skip dummy partitions.
+ if partition_info is None or len(results) == 0:
+ return cudf.Series(dtype="int64")
+ if partition_info != "sg" and (not isinstance(partition_info, dict)):
+ raise ValueError("Invalid value of partition_info")
+
+ # Additional check to skip dummy partitions required for CSR format.
+ if isna(offsets.batch_id.iloc[0]):
+ return cudf.Series(dtype="int64")
+
+ # Output:
+ # major_offsets - CSR/CSC row/col pointers
+ # minors - CSR/CSC col/row indices
+ # edge id - edge ids (same shape as minors)
+ # edge type - edge types (same shape as minors)
+ # weight - edge weight (same shape as minors)
+ # renumber map - the original vertex ids
+ # renumber map offsets - start/end of the map for each batch
+ # (only 1 per batch b/c of framework
+ # stipulations making this legal)
+ # label-hop offsets - indicate the start/end of each hop
+ # for each batch
+
+ batch_ids = offsets.batch_id
+ label_hop_offsets = offsets.offsets
+ renumber_map_offsets = offsets.renumber_map_offsets
+ del offsets
+
+ batch_ids.dropna(inplace=True)
+ label_hop_offsets.dropna(inplace=True)
+ renumber_map_offsets.dropna(inplace=True)
+
+ major_offsets_array = results.major_offsets
+ results.drop(columns="major_offsets", inplace=True)
+ major_offsets_array.dropna(inplace=True)
+ major_offsets_array = major_offsets_array.values
+
+ minors_array = results.minors
+ results.drop(columns="minors", inplace=True)
+ minors_array.dropna(inplace=True)
+ minors_array = minors_array.values
+
+ weight_array = results.weight
+ results.drop(columns="weight", inplace=True)
+ weight_array.dropna(inplace=True)
+ weight_array = (
+ cupy.array([], dtype="float32") if weight_array.empty else weight_array.values
+ )
+
+ edge_id_array = results.edge_id
+ results.drop(columns="edge_id", inplace=True)
+ edge_id_array.dropna(inplace=True)
+ edge_id_array = (
+ cupy.array([], dtype="int64") if edge_id_array.empty else edge_id_array.values
+ )
+
+ edge_type_array = results.edge_type
+ results.drop(columns="edge_type", inplace=True)
+ edge_type_array.dropna(inplace=True)
+ edge_type_array = (
+ cupy.array([], dtype="int32")
+ if edge_type_array.empty
+ else edge_type_array.values
+ )
+
+ del results
+
+ offsets_length = len(label_hop_offsets) - 1
+ if offsets_length % len(batch_ids) != 0:
+ raise ValueError("Invalid hop offsets")
+ fanout_length = int(offsets_length / len(batch_ids))
+
+ for p in range(0, int(ceil(len(batch_ids) / batches_per_partition))):
+ partition_start = p * (batches_per_partition)
+ partition_end = (p + 1) * (batches_per_partition)
+
+ label_hop_offsets_current_partition = label_hop_offsets.iloc[
+ partition_start * fanout_length : partition_end * fanout_length + 1
+ ].reset_index(drop=True)
+ label_hop_offsets_current_partition.name = "label_hop_offsets"
+
+ batch_ids_current_partition = batch_ids.iloc[partition_start:partition_end]
+
+ (
+ major_offsets_start,
+ major_offsets_end,
+ ) = label_hop_offsets_current_partition.iloc[
+ [0, -1]
+ ].values # legal since offsets has the 1 extra offset
+ results_start, results_end = major_offsets_array[
+ [major_offsets_start, major_offsets_end]
+ ] # avoid d2h copy
+
+ # no need to use end batch id, just ensure the batch is labeled correctly
+ start_batch_id = batch_ids_current_partition.iloc[0]
+ # end_batch_id = batch_ids_current_partition.iloc[-1]
+
+ # create the renumber map offsets
+ renumber_map_offsets_current_partition = renumber_map_offsets.iloc[
+ partition_start : partition_end + 1
+ ].reset_index(drop=True)
+ renumber_map_offsets_current_partition.name = "renumber_map_offsets"
+
+ (
+ renumber_map_start,
+ renumber_map_end,
+ ) = renumber_map_offsets_current_partition.iloc[
+ [0, -1]
+ ].values # avoid d2h copy
+
+ results_current_partition = create_df_from_disjoint_series(
+ [
+ cudf.Series(minors_array[results_start:results_end], name="minors"),
+ cudf.Series(
+ renumber_map.map.values[renumber_map_start:renumber_map_end],
+ name="map",
+ ),
+ label_hop_offsets_current_partition,
+ cudf.Series(
+ major_offsets_array[major_offsets_start : major_offsets_end + 1],
+ name="major_offsets",
+ ),
+ cudf.Series(weight_array[results_start:results_end], name="weight"),
+ cudf.Series(edge_id_array[results_start:results_end], name="edge_id"),
+ cudf.Series(
+ edge_type_array[results_start:results_end], name="edge_type"
+ ),
+ renumber_map_offsets_current_partition,
+ ]
+ )
+
+ end_batch_id = start_batch_id + len(batch_ids_current_partition) - 1
+ filename = f"batch={start_batch_id}-{end_batch_id}.parquet"
+ full_output_path = os.path.join(output_path, filename)
+
+ results_current_partition.to_parquet(
+ full_output_path, compression=None, index=False, force_nullable_schema=True
+ )
+
+ return cudf.Series(dtype="int64")
+
+
+def _write_samples_to_parquet_coo(
+ results: cudf.DataFrame,
+ offsets: cudf.DataFrame,
+ renumber_map: cudf.DataFrame,
+ batches_per_partition: int,
+ output_path: str,
+ partition_info: Optional[Union[dict, str]] = None,
+) -> cudf.Series:
+ """
+ Writes COO compressed samples to parquet.
Batches that are empty are discarded, and the remaining non-empty
batches are renumbered to be contiguous starting from the first
@@ -60,8 +251,10 @@ def _write_samples_to_parquet(
if partition_info != "sg" and (not isinstance(partition_info, dict)):
raise ValueError("Invalid value of partition_info")
+ offsets = offsets[:-1]
+
# Offsets is always in order, so the last batch id is always the highest
- max_batch_id = offsets.batch_id.iloc[len(offsets) - 1]
+ max_batch_id = offsets.batch_id.iloc[-1]
results.dropna(axis=1, how="all", inplace=True)
results["hop_id"] = results["hop_id"].astype("uint8")
@@ -182,9 +375,23 @@ def write_samples(
output_path: str
The output path (where parquet files should be written to).
"""
+
+ if ("majors" in results.columns) and ("minors" in results.columns):
+ write_fn = _write_samples_to_parquet_coo
+
+ # TODO these names will be deprecated in release 23.12
+ elif ("sources" in results.columns) and ("destinations" in results.columns):
+ write_fn = _write_samples_to_parquet_coo
+
+ elif "major_offsets" in results.columns and "minors" in results.columns:
+ write_fn = _write_samples_to_parquet_csr
+
+ else:
+ raise ValueError("invalid columns")
+
if hasattr(results, "compute"):
results.map_partitions(
- _write_samples_to_parquet,
+ write_fn,
offsets,
renumber_map,
batches_per_partition,
@@ -194,7 +401,7 @@ def write_samples(
).compute()
else:
- _write_samples_to_parquet(
+ write_fn(
results,
offsets,
renumber_map,
diff --git a/python/cugraph/cugraph/gnn/feature_storage/feat_storage.py b/python/cugraph/cugraph/gnn/feature_storage/feat_storage.py
index e3fdeb7f150..77a53882fc4 100644
--- a/python/cugraph/cugraph/gnn/feature_storage/feat_storage.py
+++ b/python/cugraph/cugraph/gnn/feature_storage/feat_storage.py
@@ -17,23 +17,77 @@
import cupy as cp
import numpy as np
import pandas as pd
-from cugraph.utilities.utils import import_optional
+from cugraph.utilities.utils import import_optional, MissingModule
torch = import_optional("torch")
+wgth = import_optional("pylibwholegraph.torch")
class FeatureStore:
- """The feature-store class used to store feature data for GNNS"""
+ """The feature-store class used to store feature data for GNNs"""
+
+ def __init__(
+ self,
+ backend: str = "numpy",
+ wg_comm: object = None,
+ wg_type: str = None,
+ wg_location: str = None,
+ ):
+ """
+ Constructs a new FeatureStore object
+
+ Parameters:
+ ----------
+ backend: str ('numpy', 'torch', 'wholegraph')
+ Optional (default='numpy')
+ The name of the backend to use.
+
+ wg_comm: WholeMemoryCommunicator
+ Optional (default=automatic)
+ Only used with the 'wholegraph' backend.
+ The communicator to use to store features in WholeGraph.
+
+ wg_type: str ('distributed', 'continuous', 'chunked')
+ Optional (default='distributed')
+ Only used with the 'wholegraph' backend.
+ The memory format (distributed, continuous, or chunked) of
+ this FeatureStore. For more information see the WholeGraph
+ documentation.
+
+ wg_location: str ('cpu', 'cuda')
+ Optional (default='cuda')
+ Only used with the 'wholegraph' backend.
+ Where the data is stored (cpu or cuda).
+ Defaults to storing on the GPU (cuda).
+ """
- def __init__(self, backend="numpy"):
self.fd = defaultdict(dict)
- if backend not in ["numpy", "torch"]:
+ if backend not in ["numpy", "torch", "wholegraph"]:
raise ValueError(
- f"backend {backend} not supported. Supported backends are numpy, torch"
+ f"backend {backend} not supported. "
+ "Supported backends are numpy, torch, wholegraph"
)
self.backend = backend
- def add_data(self, feat_obj: Sequence, type_name: str, feat_name: str) -> None:
+ self.__wg_comm = None
+ self.__wg_type = None
+ self.__wg_location = None
+
+ if backend == "wholegraph":
+ self.__wg_comm = (
+ wg_comm if wg_comm is not None else wgth.get_local_node_communicator()
+ )
+ self.__wg_type = wg_type if wg_type is not None else "distributed"
+ self.__wg_location = wg_location if wg_location is not None else "cuda"
+
+ if self.__wg_type not in ["distributed", "chunked", "continuous"]:
+ raise ValueError(f"invalid memory format {self.__wg_type}")
+ if (self.__wg_location != "cuda") and (self.__wg_location != "cpu"):
+ raise ValueError(f"invalid location {self.__wg_location}")
+
+ def add_data(
+ self, feat_obj: Sequence, type_name: str, feat_name: str, **kwargs
+ ) -> None:
"""
Add the feature data to the feature_storage class
Parameters:
@@ -49,9 +103,31 @@ def add_data(self, feat_obj: Sequence, type_name: str, feat_name: str) -> None:
None
"""
self.fd[feat_name][type_name] = self._cast_feat_obj_to_backend(
- feat_obj, self.backend
+ feat_obj,
+ self.backend,
+ wg_comm=self.__wg_comm,
+ wg_type=self.__wg_type,
+ wg_location=self.__wg_location,
+ **kwargs,
)
+ def add_data_no_cast(self, feat_obj, type_name: str, feat_name: str) -> None:
+ """
+ Direct add the feature data to the feature_storage class with no cast
+ Parameters:
+ ----------
+ feat_obj : array_like object
+ The feature object to save in feature store
+ type_name : str
+ The node-type/edge-type of the feature
+ feat_name: str
+ The name of the feature being stored
+ Returns:
+ -------
+ None
+ """
+ self.fd[feat_name][type_name] = feat_obj
+
def get_data(
self,
indices: Union[np.ndarray, torch.Tensor],
@@ -87,26 +163,67 @@ def get_data(
f" feature: {list(self.fd[feat_name].keys())}"
)
- return self.fd[feat_name][type_name][indices]
+ feat = self.fd[feat_name][type_name]
+ if not isinstance(wgth, MissingModule) and isinstance(
+ feat, wgth.WholeMemoryEmbedding
+ ):
+ indices_tensor = (
+ indices
+ if isinstance(indices, torch.Tensor)
+ else torch.as_tensor(indices, device="cuda")
+ )
+ return feat.gather(indices_tensor)
+ else:
+ return feat[indices]
def get_feature_list(self) -> list[str]:
return {feat_name: feats.keys() for feat_name, feats in self.fd.items()}
@staticmethod
- def _cast_feat_obj_to_backend(feat_obj, backend: str):
+ def _cast_feat_obj_to_backend(feat_obj, backend: str, **kwargs):
if backend == "numpy":
if isinstance(feat_obj, (cudf.DataFrame, pd.DataFrame)):
- return _cast_to_numpy_ar(feat_obj.values)
+ return _cast_to_numpy_ar(feat_obj.values, **kwargs)
else:
- return _cast_to_numpy_ar(feat_obj)
+ return _cast_to_numpy_ar(feat_obj, **kwargs)
elif backend == "torch":
if isinstance(feat_obj, (cudf.DataFrame, pd.DataFrame)):
- return _cast_to_torch_tensor(feat_obj.values)
+ return _cast_to_torch_tensor(feat_obj.values, **kwargs)
else:
- return _cast_to_torch_tensor(feat_obj)
+ return _cast_to_torch_tensor(feat_obj, **kwargs)
+ elif backend == "wholegraph":
+ return _get_wg_embedding(feat_obj, **kwargs)
+
+def _get_wg_embedding(feat_obj, wg_comm=None, wg_type=None, wg_location=None, **kwargs):
+ wg_comm_obj = wg_comm or wgth.get_local_node_communicator()
+ wg_type_str = wg_type or "distributed"
+ wg_location_str = wg_location or "cuda"
-def _cast_to_torch_tensor(ar):
+ if isinstance(feat_obj, (cudf.DataFrame, pd.DataFrame)):
+ th_tensor = _cast_to_torch_tensor(feat_obj.values)
+ else:
+ th_tensor = _cast_to_torch_tensor(feat_obj)
+ wg_embedding = wgth.create_embedding(
+ wg_comm_obj,
+ wg_type_str,
+ wg_location_str,
+ th_tensor.dtype,
+ th_tensor.shape,
+ )
+ (
+ local_wg_tensor,
+ local_ld_offset,
+ ) = wg_embedding.get_embedding_tensor().get_local_tensor()
+ local_th_tensor = th_tensor[
+ local_ld_offset : local_ld_offset + local_wg_tensor.shape[0]
+ ]
+ local_wg_tensor.copy_(local_th_tensor)
+ wg_comm_obj.barrier()
+ return wg_embedding
+
+
+def _cast_to_torch_tensor(ar, **kwargs):
if isinstance(ar, cp.ndarray):
ar = torch.as_tensor(ar, device="cuda")
elif isinstance(ar, np.ndarray):
@@ -116,7 +233,7 @@ def _cast_to_torch_tensor(ar):
return ar
-def _cast_to_numpy_ar(ar):
+def _cast_to_numpy_ar(ar, **kwargs):
if isinstance(ar, cp.ndarray):
ar = ar.get()
elif type(ar).__name__ == "Tensor":
diff --git a/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx
index 4258be3ef71..5a2784e2363 100644
--- a/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx
+++ b/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx
@@ -56,9 +56,11 @@ def force_atlas2(input_graph,
if not input_graph.edgelist:
input_graph.view_edge_list()
- # FIXME: This implementation assumes that the number of vertices
- # is the max vertex ID + 1 which is not always the case.
- num_verts = input_graph.nodes().max() + 1
+ # this code allows handling of renumbered graphs
+ if input_graph.is_renumbered():
+ num_verts = input_graph.renumber_map.df_internal_to_external['id'].max()+1
+ else:
+ num_verts = input_graph.nodes().max() + 1
num_edges = len(input_graph.edgelist.edgelist_df['src'])
cdef GraphCOOView[int,int,float] graph_float
diff --git a/python/cugraph/cugraph/sampling/sampling_utilities.py b/python/cugraph/cugraph/sampling/sampling_utilities.py
new file mode 100644
index 00000000000..50c315129dc
--- /dev/null
+++ b/python/cugraph/cugraph/sampling/sampling_utilities.py
@@ -0,0 +1,198 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cupy
+import cudf
+
+import warnings
+
+
+def sampling_results_from_cupy_array_dict(
+ cupy_array_dict,
+ weight_t,
+ num_hops,
+ with_edge_properties=False,
+ return_offsets=False,
+ renumber=False,
+ use_legacy_names=True,
+ include_hop_column=True,
+):
+ """
+ Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper
+ """
+ results_df = cudf.DataFrame()
+
+ if use_legacy_names:
+ major_col_name = "sources"
+ minor_col_name = "destinations"
+ warning_msg = (
+ "The legacy column names (sources, destinations)"
+ " will no longer be supported for uniform_neighbor_sample"
+ " in release 23.12. The use_legacy_names=False option will"
+ " become the only option, and (majors, minors) will be the"
+ " only supported column names."
+ )
+ warnings.warn(warning_msg, FutureWarning)
+ else:
+ major_col_name = "majors"
+ minor_col_name = "minors"
+
+ if with_edge_properties:
+ majors = cupy_array_dict["majors"]
+ if majors is not None:
+ results_df["majors"] = majors
+
+ results_df_cols = [
+ "minors",
+ "weight",
+ "edge_id",
+ "edge_type",
+ ]
+
+ for col in results_df_cols:
+ array = cupy_array_dict[col]
+ # The length of each of these arrays should be the same
+ results_df[col] = array
+
+ results_df.rename(
+ columns={"majors": major_col_name, "minors": minor_col_name}, inplace=True
+ )
+
+ label_hop_offsets = cupy_array_dict["label_hop_offsets"]
+ batch_ids = cupy_array_dict["batch_id"]
+
+ if renumber:
+ renumber_df = cudf.DataFrame(
+ {
+ "map": cupy_array_dict["renumber_map"],
+ }
+ )
+
+ if not return_offsets:
+ if len(batch_ids) > 0:
+ batch_ids_r = cudf.Series(batch_ids).repeat(
+ cupy.diff(cupy_array_dict["renumber_map_offsets"])
+ )
+ batch_ids_r.reset_index(drop=True, inplace=True)
+ renumber_df["batch_id"] = batch_ids_r
+ else:
+ renumber_df["batch_id"] = None
+
+ if return_offsets:
+ batches_series = cudf.Series(
+ batch_ids,
+ name="batch_id",
+ )
+ if include_hop_column:
+ # TODO remove this logic in release 23.12
+ offsets_df = cudf.Series(
+ label_hop_offsets[cupy.arange(len(batch_ids) + 1) * num_hops],
+ name="offsets",
+ ).to_frame()
+ else:
+ offsets_df = cudf.Series(
+ label_hop_offsets,
+ name="offsets",
+ ).to_frame()
+
+ if len(batches_series) > len(offsets_df):
+ # this is extremely rare so the inefficiency is ok
+ offsets_df = offsets_df.join(batches_series, how="outer").sort_index()
+ else:
+ offsets_df["batch_id"] = batches_series
+
+ if renumber:
+ renumber_offset_series = cudf.Series(
+ cupy_array_dict["renumber_map_offsets"], name="renumber_map_offsets"
+ )
+
+ if len(renumber_offset_series) > len(offsets_df):
+ # this is extremely rare so the inefficiency is ok
+ offsets_df = offsets_df.join(
+ renumber_offset_series, how="outer"
+ ).sort_index()
+ else:
+ offsets_df["renumber_map_offsets"] = renumber_offset_series
+
+ else:
+ if len(batch_ids) > 0:
+ batch_ids_r = cudf.Series(cupy.repeat(batch_ids, num_hops))
+ batch_ids_r = cudf.Series(batch_ids_r).repeat(
+ cupy.diff(label_hop_offsets)
+ )
+ batch_ids_r.reset_index(drop=True, inplace=True)
+
+ results_df["batch_id"] = batch_ids_r
+ else:
+ results_df["batch_id"] = None
+
+ # TODO remove this logic in release 23.12, hops will always returned as offsets
+ if include_hop_column:
+ if len(batch_ids) > 0:
+ hop_ids_r = cudf.Series(cupy.arange(num_hops))
+ hop_ids_r = cudf.concat([hop_ids_r] * len(batch_ids), ignore_index=True)
+
+ # generate the hop column
+ hop_ids_r = (
+ cudf.Series(hop_ids_r, name="hop_id")
+ .repeat(cupy.diff(label_hop_offsets))
+ .reset_index(drop=True)
+ )
+ else:
+ hop_ids_r = cudf.Series(name="hop_id", dtype="int32")
+
+ results_df = results_df.join(hop_ids_r, how="outer").sort_index()
+
+ if major_col_name not in results_df:
+ if use_legacy_names:
+ raise ValueError("Can't use legacy names with major offsets")
+
+ major_offsets_series = cudf.Series(
+ cupy_array_dict["major_offsets"], name="major_offsets"
+ )
+ if len(major_offsets_series) > len(results_df):
+ # this is extremely rare so the inefficiency is ok
+ results_df = results_df.join(
+ major_offsets_series, how="outer"
+ ).sort_index()
+ else:
+ results_df["major_offsets"] = major_offsets_series
+
+ else:
+ # TODO this is deprecated, remove it in 23.12
+
+ results_df[major_col_name] = cupy_array_dict["sources"]
+ results_df[minor_col_name] = cupy_array_dict["destinations"]
+ indices = cupy_array_dict["indices"]
+
+ if indices is None:
+ results_df["indices"] = None
+ else:
+ results_df["indices"] = indices
+ if weight_t == "int32":
+ results_df["indices"] = indices.astype("int32")
+ elif weight_t == "int64":
+ results_df["indices"] = indices.astype("int64")
+ else:
+ results_df["indices"] = indices
+
+ if return_offsets:
+ if renumber:
+ return results_df, offsets_df, renumber_df
+ else:
+ return results_df, offsets_df
+
+ if renumber:
+ return results_df, renumber_df
+
+ return (results_df,)
diff --git a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
index 219854bb002..1832585c0ab 100644
--- a/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
@@ -16,6 +16,8 @@
from pylibcugraph import ResourceHandle
from pylibcugraph import uniform_neighbor_sample as pylibcugraph_uniform_neighbor_sample
+from cugraph.sampling.sampling_utilities import sampling_results_from_cupy_array_dict
+
import numpy
import cudf
@@ -58,15 +60,20 @@ def uniform_neighbor_sample(
G: Graph,
start_list: Sequence,
fanout_vals: List[int],
+ *,
with_replacement: bool = True,
with_edge_properties: bool = False, # deprecated
with_batch_ids: bool = False,
random_state: int = None,
return_offsets: bool = False,
return_hops: bool = True,
+ include_hop_column: bool = True, # deprecated
prior_sources_behavior: str = None,
deduplicate_sources: bool = False,
renumber: bool = False,
+ use_legacy_names: bool = True, # deprecated
+ compress_per_hop: bool = False,
+ compression: str = "COO",
) -> Union[cudf.DataFrame, Tuple[cudf.DataFrame, cudf.DataFrame]]:
"""
Does neighborhood sampling, which samples nodes from a graph based on the
@@ -111,6 +118,12 @@ def uniform_neighbor_sample(
corresponding to the hop where the edge appeared.
Defaults to True.
+ include_hop_column: bool, optional (default=True)
+ Deprecated. Defaults to True.
+ If True, will include the hop column even if
+ return_offsets is True. This option will
+ be removed in release 23.12.
+
prior_sources_behavior: str, optional (default=None)
Options are "carryover", and "exclude".
Default will leave the source list as-is.
@@ -129,6 +142,21 @@ def uniform_neighbor_sample(
will return the renumber map and renumber map offsets
as an additional dataframe.
+ use_legacy_names: bool, optional (default=True)
+ Whether to use the legacy column names (sources, destinations).
+ If True, will use "sources" and "destinations" as the column names.
+ If False, will use "majors" and "minors" as the column names.
+ Deprecated. Will be removed in release 23.12 in favor of always
+ using the new names "majors" and "minors".
+
+ compress_per_hop: bool, optional (default=False)
+ Whether to compress globally (default), or to produce a separate
+ compressed edgelist per hop.
+
+ compression: str, optional (default=COO)
+ Sets the compression type for the output minibatches.
+ Valid options are COO (default), CSR, CSC, DCSR, and DCSC.
+
Returns
-------
result : cudf.DataFrame or Tuple[cudf.DataFrame, cudf.DataFrame]
@@ -193,12 +221,62 @@ def uniform_neighbor_sample(
Contains the batch offsets for the renumber maps
"""
+ if use_legacy_names:
+ major_col_name = "sources"
+ minor_col_name = "destinations"
+ warning_msg = (
+ "The legacy column names (sources, destinations)"
+ " will no longer be supported for uniform_neighbor_sample"
+ " in release 23.12. The use_legacy_names=False option will"
+ " become the only option, and (majors, minors) will be the"
+ " only supported column names."
+ )
+ warnings.warn(warning_msg, FutureWarning)
+ else:
+ major_col_name = "majors"
+ minor_col_name = "minors"
+
+ if compression not in ["COO", "CSR", "CSC", "DCSR", "DCSC"]:
+ raise ValueError("compression must be one of COO, CSR, CSC, DCSR, or DCSC")
+
+ if (
+ (compression != "COO")
+ and (not compress_per_hop)
+ and prior_sources_behavior != "exclude"
+ ):
+ raise ValueError(
+ "hop-agnostic compression is only supported with"
+ " the exclude prior sources behavior due to limitations "
+ "of the libcugraph C++ API"
+ )
+
+ if compress_per_hop and prior_sources_behavior != "carryover":
+ raise ValueError(
+ "Compressing the edgelist per hop is only supported "
+ "with the carryover prior sources behavior due to limitations"
+ " of the libcugraph C++ API"
+ )
+
+ if include_hop_column:
+ warning_msg = (
+ "The include_hop_column flag is deprecated and will be"
+ " removed in the next release in favor of always "
+ "excluding the hop column when return_offsets is True"
+ )
+ warnings.warn(warning_msg, FutureWarning)
+
+ if compression != "COO":
+ raise ValueError(
+ "Including the hop id column is only supported with COO compression."
+ )
+
if with_edge_properties:
warning_msg = (
"The with_edge_properties flag is deprecated"
- " and will be removed in the next release."
+ " and will be removed in the next release in favor"
+ " of returning all properties in the graph"
)
- warnings.warn(warning_msg, DeprecationWarning)
+ warnings.warn(warning_msg, FutureWarning)
if isinstance(start_list, int):
start_list = [start_list]
@@ -255,7 +333,7 @@ def uniform_neighbor_sample(
start_list = G.lookup_internal_vertex_id(start_list, columns)
start_list = start_list.rename(columns={columns[0]: start_col_name})
- sampling_result = pylibcugraph_uniform_neighbor_sample(
+ sampling_result_array_dict = pylibcugraph_uniform_neighbor_sample(
resource_handle=ResourceHandle(),
input_graph=G._plc_graph,
start_list=start_list[start_col_name],
@@ -271,104 +349,27 @@ def uniform_neighbor_sample(
deduplicate_sources=deduplicate_sources,
return_hops=return_hops,
renumber=renumber,
+ compression=compression,
+ compress_per_hop=compress_per_hop,
+ return_dict=True,
)
- df = cudf.DataFrame()
-
- if with_edge_properties:
- # TODO use a dictionary at PLC w/o breaking users
- if renumber:
- (
- sources,
- destinations,
- weights,
- edge_ids,
- edge_types,
- batch_ids,
- offsets,
- hop_ids,
- renumber_map,
- renumber_map_offsets,
- ) = sampling_result
- else:
- (
- sources,
- destinations,
- weights,
- edge_ids,
- edge_types,
- batch_ids,
- offsets,
- hop_ids,
- ) = sampling_result
-
- df["sources"] = sources
- df["destinations"] = destinations
- df["weight"] = weights
- df["edge_id"] = edge_ids
- df["edge_type"] = edge_types
- df["hop_id"] = hop_ids
-
- if renumber:
- renumber_df = cudf.DataFrame(
- {
- "map": renumber_map,
- }
- )
-
- if not return_offsets:
- batch_ids_r = cudf.Series(batch_ids).repeat(
- cp.diff(renumber_map_offsets)
- )
- batch_ids_r.reset_index(drop=True, inplace=True)
- renumber_df["batch_id"] = batch_ids_r
-
- if return_offsets:
- offsets_df = cudf.DataFrame(
- {
- "batch_id": batch_ids,
- "offsets": offsets[:-1],
- }
- )
-
- if renumber:
- offsets_df["renumber_map_offsets"] = renumber_map_offsets[:-1]
-
- else:
- if len(batch_ids) > 0:
- batch_ids = cudf.Series(batch_ids).repeat(cp.diff(offsets))
- batch_ids.reset_index(drop=True, inplace=True)
-
- df["batch_id"] = batch_ids
-
- else:
- sources, destinations, indices = sampling_result
-
- df["sources"] = sources
- df["destinations"] = destinations
-
- if indices is None:
- df["indices"] = None
- else:
- df["indices"] = indices
- if weight_t == "int32":
- df["indices"] = indices.astype("int32")
- elif weight_t == "int64":
- df["indices"] = indices.astype("int64")
- else:
- df["indices"] = indices
+ dfs = sampling_results_from_cupy_array_dict(
+ sampling_result_array_dict,
+ weight_t,
+ len(fanout_vals),
+ with_edge_properties=with_edge_properties,
+ return_offsets=return_offsets,
+ renumber=renumber,
+ use_legacy_names=use_legacy_names,
+ include_hop_column=include_hop_column,
+ )
if G.renumbered and not renumber:
- df = G.unrenumber(df, "sources", preserve_order=True)
- df = G.unrenumber(df, "destinations", preserve_order=True)
-
- if return_offsets:
- if renumber:
- return df, offsets_df, renumber_df
- else:
- return df, offsets_df
+ dfs[0] = G.unrenumber(dfs[0], major_col_name, preserve_order=True)
+ dfs[0] = G.unrenumber(dfs[0], minor_col_name, preserve_order=True)
- if renumber:
- return df, renumber_df
+ if len(dfs) > 1:
+ return dfs
- return df
+ return dfs[0]
diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py
index 0586d0d853c..fa94fa67625 100644
--- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py
+++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py
@@ -181,7 +181,6 @@ def __from_edgelist(
workers = _client.scheduler_info()["workers"]
# Repartition to 2 partitions per GPU for memory efficient process
input_ddf = input_ddf.repartition(npartitions=len(workers) * 2)
- # FIXME: Make a copy of the input ddf before implicitly altering it.
input_ddf = input_ddf.map_partitions(lambda df: df.copy())
# The dataframe will be symmetrized iff the graph is undirected
# otherwise, the inital dataframe will be returned
@@ -334,7 +333,7 @@ def __from_edgelist(
)
for w, edata in ddf.items()
}
- del ddf
+ # FIXME: For now, don't delete the copied dataframe to avoid crash
self._plc_graph = {
w: _client.compute(delayed_task, workers=w, allow_other_workers=False)
for w, delayed_task in delayed_tasks_d.items()
@@ -1193,7 +1192,5 @@ def _get_column_from_ls_dfs(lst_df, col_name):
if len_df == 0:
return lst_df[0][col_name]
output_col = cudf.concat([df[col_name] for df in lst_df], ignore_index=True)
- for df in lst_df:
- df.drop(columns=[col_name], inplace=True)
- gc.collect()
+ # FIXME: For now, don't delete the copied dataframe to avoid cras
return output_col
diff --git a/python/cugraph/cugraph/tests/community/test_louvain.py b/python/cugraph/cugraph/tests/community/test_louvain.py
index 183be071a44..5441998fb46 100644
--- a/python/cugraph/cugraph/tests/community/test_louvain.py
+++ b/python/cugraph/cugraph/tests/community/test_louvain.py
@@ -142,3 +142,19 @@ def test_louvain_csr_graph(is_weighted):
assert len(parition_diffs) == 0
assert mod_csr == mod_coo
+
+
+@pytest.mark.sg
+def test_louvain_nx_graph_with_isolated_nodes():
+ # Cluster IDs are expected to unique if all nodes are isolated
+ G = nx.Graph()
+ G.add_nodes_from(range(5))
+ result, _ = cugraph.louvain(G)
+ assert set(result.keys()) == set(G.nodes)
+ assert len(set(result.values())) == G.number_of_nodes()
+
+ # A graph with 5 nodes, where 3 of the nodes are isolated
+ G.add_edge(1, 2)
+ result, _ = cugraph.louvain(G)
+ assert set(result.keys()) == set(G.nodes)
+ assert len(set(result.values())) == G.number_of_nodes() - 1
diff --git a/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage.py b/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage.py
index 2d1537d11e3..2b0ec4b11d0 100644
--- a/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage.py
+++ b/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage.py
@@ -10,7 +10,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-# Import FeatureStore class
import pytest
import numpy as np
diff --git a/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage_wholegraph.py b/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage_wholegraph.py
new file mode 100644
index 00000000000..1892e8a85a6
--- /dev/null
+++ b/python/cugraph/cugraph/tests/data_store/test_gnn_feat_storage_wholegraph.py
@@ -0,0 +1,85 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import numpy as np
+
+from cugraph.gnn import FeatureStore
+
+from cugraph.utilities.utils import import_optional, MissingModule
+
+pylibwholegraph = import_optional("pylibwholegraph")
+wmb = import_optional("pylibwholegraph.binding.wholememory_binding")
+torch = import_optional("torch")
+
+
+def runtest(world_rank: int, world_size: int):
+ from pylibwholegraph.torch.initialize import init_torch_env_and_create_wm_comm
+
+ wm_comm, _ = init_torch_env_and_create_wm_comm(
+ world_rank,
+ world_size,
+ world_rank,
+ world_size,
+ )
+ wm_comm = wm_comm.wmb_comm
+
+ generator = np.random.default_rng(62)
+ arr = (
+ generator.integers(low=0, high=100, size=100_000)
+ .reshape(10_000, -1)
+ .astype("float64")
+ )
+
+ fs = FeatureStore(backend="wholegraph")
+ fs.add_data(arr, "type2", "feat1")
+ wm_comm.barrier()
+
+ indices_to_fetch = np.random.randint(low=0, high=len(arr), size=1024)
+ output_fs = fs.get_data(indices_to_fetch, type_name="type2", feat_name="feat1")
+ assert isinstance(output_fs, torch.Tensor)
+ assert output_fs.is_cuda
+ expected = arr[indices_to_fetch]
+ np.testing.assert_array_equal(output_fs.cpu().numpy(), expected)
+
+ wmb.finalize()
+
+
+@pytest.mark.sg
+@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
+@pytest.mark.skipif(
+ isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available"
+)
+def test_feature_storage_wholegraph_backend():
+ from pylibwholegraph.utils.multiprocess import multiprocess_run
+
+ gpu_count = wmb.fork_get_gpu_count()
+ print("gpu count:", gpu_count)
+ assert gpu_count > 0
+
+ multiprocess_run(1, runtest)
+
+
+@pytest.mark.mg
+@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
+@pytest.mark.skipif(
+ isinstance(pylibwholegraph, MissingModule), reason="wholegraph not available"
+)
+def test_feature_storage_wholegraph_backend_mg():
+ from pylibwholegraph.utils.multiprocess import multiprocess_run
+
+ gpu_count = wmb.fork_get_gpu_count()
+ print("gpu count:", gpu_count)
+ assert gpu_count > 0
+
+ multiprocess_run(gpu_count, runtest)
diff --git a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
index 495a2d945c0..6b1fd6bcc4e 100644
--- a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
+++ b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
@@ -13,13 +13,46 @@
import time
import pytest
-import scipy.io
-from sklearn.manifold import trustworthiness
-import cudf
import cugraph
+from cugraph.structure import number_map
from cugraph.internals import GraphBasedDimRedCallback
-from cugraph.datasets import karate, polbooks, dolphins, netscience
+from sklearn.manifold import trustworthiness
+import scipy.io
+from cugraph.datasets import (
+ karate,
+ polbooks,
+ dolphins,
+ netscience,
+ dining_prefs,
+)
+
+# FIXME Removed the multi column positional due to it being non-deterministic
+# need to replace this coverage. Issue 3890 in cuGraph repo was created.
+
+# This method renumbers a dataframe so it can be tested using Trustworthiness.
+# it converts a dataframe with string vertex ids to a renumbered int one.
+
+
+def renumbered_edgelist(df):
+ renumbered_df, num_map = number_map.NumberMap.renumber(df, "src", "dst")
+ new_df = renumbered_df[["renumbered_src", "renumbered_dst", "wgt"]]
+ column_names = {"renumbered_src": "src", "renumbered_dst": "dst"}
+ new_df = new_df.rename(columns=column_names)
+ return new_df
+
+
+# This method converts a dataframe to a sparce matrix that is required by
+# scipy Trustworthiness to verify the layout
+def get_coo_array(edgelist):
+ coo = edgelist
+ x = max(coo["src"].max(), coo["dst"].max()) + 1
+ row = coo["src"].to_numpy()
+ col = coo["dst"].to_numpy()
+ data = coo["wgt"].to_numpy()
+ M = scipy.sparse.coo_array((data, (row, col)), shape=(x, x))
+
+ return M
def cugraph_call(
@@ -37,11 +70,15 @@ def cugraph_call(
strong_gravity_mode,
gravity,
callback=None,
+ renumber=False,
):
-
G = cugraph.Graph()
+ if cu_M["src"] is not int or cu_M["dst"] is not int:
+ renumber = True
+ else:
+ renumber = False
G.from_cudf_edgelist(
- cu_M, source="src", destination="dst", edge_attr="wgt", renumber=False
+ cu_M, source="src", destination="dst", edge_attr="wgt", renumber=renumber
)
t1 = time.time()
@@ -66,7 +103,19 @@ def cugraph_call(
return pos
-DATASETS = [(karate, 0.70), (polbooks, 0.75), (dolphins, 0.66), (netscience, 0.66)]
+DATASETS = [
+ (karate, 0.70),
+ (polbooks, 0.75),
+ (dolphins, 0.66),
+ (netscience, 0.66),
+ (dining_prefs, 0.50),
+]
+
+DATASETS2 = [
+ (polbooks, 0.75),
+ (dolphins, 0.66),
+ (netscience, 0.66),
+]
MAX_ITERATIONS = [500]
@@ -95,8 +144,7 @@ def on_train_end(self, positions):
@pytest.mark.parametrize("max_iter", MAX_ITERATIONS)
@pytest.mark.parametrize("barnes_hut_optimize", BARNES_HUT_OPTIMIZE)
def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
- cu_M = graph_file.get_edgelist()
- dataset_path = graph_file.get_path()
+ cu_M = graph_file.get_edgelist(download=True)
test_callback = TestCallback()
cu_pos = cugraph_call(
cu_M,
@@ -126,9 +174,11 @@ def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
iterations on a given graph.
"""
- matrix_file = dataset_path.with_suffix(".mtx")
- M = scipy.io.mmread(matrix_file)
- M = M.toarray()
+ if "string" in graph_file.metadata["col_types"]:
+ df = renumbered_edgelist(graph_file.get_edgelist(download=True))
+ M = get_coo_array(df)
+ else:
+ M = get_coo_array(graph_file.get_edgelist(download=True))
cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas())
print(cu_trust, score)
assert cu_trust > score
@@ -138,74 +188,3 @@ def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
assert test_callback.on_epoch_end_called_count == max_iter
# verify `on_train_end` was only called once
assert test_callback.on_train_end_called_count == 1
-
-
-# FIXME: this test occasionally fails - skipping to prevent CI failures but
-# need to revisit ASAP
-@pytest.mark.sg
-@pytest.mark.skip(reason="non-deterministric - needs fixing!")
-@pytest.mark.parametrize("graph_file, score", DATASETS[:-1])
-@pytest.mark.parametrize("max_iter", MAX_ITERATIONS)
-@pytest.mark.parametrize("barnes_hut_optimize", BARNES_HUT_OPTIMIZE)
-def test_force_atlas2_multi_column_pos_list(
- graph_file, score, max_iter, barnes_hut_optimize
-):
- cu_M = graph_file.get_edgelist()
- dataset_path = graph_file.get_path()
- test_callback = TestCallback()
- pos = cugraph_call(
- cu_M,
- max_iter=max_iter,
- pos_list=None,
- outbound_attraction_distribution=True,
- lin_log_mode=False,
- prevent_overlapping=False,
- edge_weight_influence=1.0,
- jitter_tolerance=1.0,
- barnes_hut_optimize=False,
- barnes_hut_theta=0.5,
- scaling_ratio=2.0,
- strong_gravity_mode=False,
- gravity=1.0,
- callback=test_callback,
- )
-
- cu_M.rename(columns={"0": "src_0", "1": "dst_0"}, inplace=True)
- cu_M["src_1"] = cu_M["src_0"] + 1000
- cu_M["dst_1"] = cu_M["dst_0"] + 1000
-
- G = cugraph.Graph()
- G.from_cudf_edgelist(
- cu_M, source=["src_0", "src_1"], destination=["dst_0", "dst_1"], edge_attr="2"
- )
-
- pos_list = cudf.DataFrame()
- pos_list["vertex_0"] = pos["vertex"]
- pos_list["vertex_1"] = pos_list["vertex_0"] + 1000
- pos_list["x"] = pos["x"]
- pos_list["y"] = pos["y"]
-
- cu_pos = cugraph.force_atlas2(
- G,
- max_iter=max_iter,
- pos_list=pos_list,
- outbound_attraction_distribution=True,
- lin_log_mode=False,
- prevent_overlapping=False,
- edge_weight_influence=1.0,
- jitter_tolerance=1.0,
- barnes_hut_optimize=False,
- barnes_hut_theta=0.5,
- scaling_ratio=2.0,
- strong_gravity_mode=False,
- gravity=1.0,
- callback=test_callback,
- )
-
- cu_pos = cu_pos.sort_values("0_vertex")
- matrix_file = dataset_path.with_suffix(".mtx")
- M = scipy.io.mmread(matrix_file)
- M = M.todense()
- cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas())
- print(cu_trust, score)
- assert cu_trust > score
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py
index b56a6baae2b..ee739c9f236 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py
@@ -34,6 +34,7 @@ def setup_function():
IS_DIRECTED = [False]
HAS_VERTEX_PAIR = [True, False]
+IS_WEIGHTED = [True, False]
# =============================================================================
@@ -48,6 +49,7 @@ def setup_function():
(datasets, "graph_file"),
(IS_DIRECTED, "directed"),
(HAS_VERTEX_PAIR, "has_vertex_pair"),
+ (IS_WEIGHTED, "is_weighted"),
)
@@ -57,7 +59,9 @@ def input_combo(request):
Simply return the current combination of params as a dictionary for use in
tests or other parameterized fixtures.
"""
- parameters = dict(zip(("graph_file", "directed", "has_vertex_pair"), request.param))
+ parameters = dict(
+ zip(("graph_file", "directed", "has_vertex_pair", "is_weighted"), request.param)
+ )
return parameters
@@ -72,7 +76,10 @@ def input_expected_output(input_combo):
input_data_path = input_combo["graph_file"]
directed = input_combo["directed"]
has_vertex_pair = input_combo["has_vertex_pair"]
- G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed)
+ is_weighted = input_combo["is_weighted"]
+ G = utils.generate_cugraph_graph_from_file(
+ input_data_path, directed=directed, edgevals=is_weighted
+ )
if has_vertex_pair:
# Sample random vertices from the graph and compute the two_hop_neighbors
# with those seeds
@@ -84,7 +91,9 @@ def input_expected_output(input_combo):
vertex_pair = None
input_combo["vertex_pair"] = vertex_pair
- sg_cugraph_jaccard = cugraph.experimental.jaccard(G, input_combo["vertex_pair"])
+ sg_cugraph_jaccard = cugraph.jaccard(
+ G, input_combo["vertex_pair"], use_weight=is_weighted
+ )
# Save the results back to the input_combo dictionary to prevent redundant
# cuGraph runs. Other tests using the input_combo fixture will look for
# them, and if not present they will have to re-run the same cuGraph call.
@@ -104,6 +113,7 @@ def input_expected_output(input_combo):
ddf,
source="src",
destination="dst",
+ edge_attr="value" if is_weighted else None,
renumber=True,
store_transposed=True,
)
@@ -122,8 +132,11 @@ def input_expected_output(input_combo):
def test_dask_mg_jaccard(dask_client, benchmark, input_expected_output):
dg = input_expected_output["MGGraph"]
+ use_weight = input_expected_output["is_weighted"]
- result_jaccard = benchmark(dcg.jaccard, dg, input_expected_output["vertex_pair"])
+ result_jaccard = benchmark(
+ dcg.jaccard, dg, input_expected_output["vertex_pair"], use_weight=use_weight
+ )
result_jaccard = (
result_jaccard.compute()
@@ -151,41 +164,3 @@ def test_dask_mg_jaccard(dask_client, benchmark, input_expected_output):
assert len(jaccard_coeff_diffs1) == 0
assert len(jaccard_coeff_diffs2) == 0
-
-
-@pytest.mark.mg
-def test_dask_mg_weighted_jaccard(dask_client):
- input_data_path = datasets[0]
- chunksize = dcg.get_chunksize(input_data_path)
- ddf = dask_cudf.read_csv(
- input_data_path,
- chunksize=chunksize,
- delimiter=" ",
- names=["src", "dst", "value"],
- dtype=["int32", "int32", "float32"],
- )
-
- dg = cugraph.Graph(directed=False)
- dg.from_dask_cudf_edgelist(
- ddf,
- source="src",
- destination="dst",
- edge_attr="value",
- renumber=True,
- store_transposed=True,
- )
- with pytest.raises(ValueError):
- dcg.jaccard(dg)
-
- dg = cugraph.Graph(directed=False)
- dg.from_dask_cudf_edgelist(
- ddf,
- source="src",
- destination="dst",
- edge_attr="value",
- store_transposed=True,
- )
-
- use_weight = True
- with pytest.raises(ValueError):
- dcg.jaccard(dg, use_weight=use_weight)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py
index ce4bf619f47..87407d7b59c 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py
@@ -34,6 +34,7 @@ def setup_function():
IS_DIRECTED = [False]
HAS_VERTEX_PAIR = [True, False]
+IS_WEIGHTED = [True, False]
# =============================================================================
@@ -48,6 +49,7 @@ def setup_function():
(datasets, "graph_file"),
(IS_DIRECTED, "directed"),
(HAS_VERTEX_PAIR, "has_vertex_pair"),
+ (IS_WEIGHTED, "is_weighted"),
)
@@ -57,7 +59,9 @@ def input_combo(request):
Simply return the current combination of params as a dictionary for use in
tests or other parameterized fixtures.
"""
- parameters = dict(zip(("graph_file", "directed", "has_vertex_pair"), request.param))
+ parameters = dict(
+ zip(("graph_file", "directed", "has_vertex_pair", "is_weighted"), request.param)
+ )
return parameters
@@ -72,7 +76,10 @@ def input_expected_output(input_combo):
input_data_path = input_combo["graph_file"]
directed = input_combo["directed"]
has_vertex_pair = input_combo["has_vertex_pair"]
- G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed)
+ is_weighted = input_combo["is_weighted"]
+ G = utils.generate_cugraph_graph_from_file(
+ input_data_path, directed=directed, edgevals=is_weighted
+ )
if has_vertex_pair:
# Sample random vertices from the graph and compute the two_hop_neighbors
# with those seeds
@@ -84,7 +91,9 @@ def input_expected_output(input_combo):
vertex_pair = None
input_combo["vertex_pair"] = vertex_pair
- sg_cugraph_overlap = cugraph.experimental.overlap(G, input_combo["vertex_pair"])
+ sg_cugraph_overlap = cugraph.overlap(
+ G, input_combo["vertex_pair"], use_weight=is_weighted
+ )
# Save the results back to the input_combo dictionary to prevent redundant
# cuGraph runs. Other tests using the input_combo fixture will look for
# them, and if not present they will have to re-run the same cuGraph call.
@@ -104,6 +113,7 @@ def input_expected_output(input_combo):
ddf,
source="src",
destination="dst",
+ edge_attr="value" if is_weighted else None,
renumber=True,
store_transposed=True,
)
@@ -125,8 +135,11 @@ def input_expected_output(input_combo):
def test_dask_mg_overlap(dask_client, benchmark, input_expected_output):
dg = input_expected_output["MGGraph"]
+ use_weight = input_expected_output["is_weighted"]
- result_overlap = benchmark(dcg.overlap, dg, input_expected_output["vertex_pair"])
+ result_overlap = benchmark(
+ dcg.overlap, dg, input_expected_output["vertex_pair"], use_weight=use_weight
+ )
result_overlap = (
result_overlap.compute()
@@ -154,41 +167,3 @@ def test_dask_mg_overlap(dask_client, benchmark, input_expected_output):
assert len(overlap_coeff_diffs1) == 0
assert len(overlap_coeff_diffs2) == 0
-
-
-@pytest.mark.mg
-def test_dask_mg_weighted_overlap():
- input_data_path = datasets[0]
- chunksize = dcg.get_chunksize(input_data_path)
- ddf = dask_cudf.read_csv(
- input_data_path,
- chunksize=chunksize,
- delimiter=" ",
- names=["src", "dst", "value"],
- dtype=["int32", "int32", "float32"],
- )
-
- dg = cugraph.Graph(directed=False)
- dg.from_dask_cudf_edgelist(
- ddf,
- source="src",
- destination="dst",
- edge_attr="value",
- renumber=True,
- store_transposed=True,
- )
- with pytest.raises(ValueError):
- dcg.overlap(dg)
-
- dg = cugraph.Graph(directed=False)
- dg.from_dask_cudf_edgelist(
- ddf,
- source="src",
- destination="dst",
- edge_attr="value",
- store_transposed=True,
- )
-
- use_weight = True
- with pytest.raises(ValueError):
- dcg.overlap(dg, use_weight=use_weight)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py
index af6b60771a0..66832d08427 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py
@@ -35,6 +35,7 @@ def setup_function():
IS_DIRECTED = [False]
HAS_VERTEX_PAIR = [True, False]
+IS_WEIGHTED = [True, False]
# =============================================================================
@@ -49,6 +50,7 @@ def setup_function():
(datasets, "graph_file"),
(IS_DIRECTED, "directed"),
(HAS_VERTEX_PAIR, "has_vertex_pair"),
+ (IS_WEIGHTED, "is_weighted"),
)
@@ -58,7 +60,9 @@ def input_combo(request):
Simply return the current combination of params as a dictionary for use in
tests or other parameterized fixtures.
"""
- parameters = dict(zip(("graph_file", "directed", "has_vertex_pair"), request.param))
+ parameters = dict(
+ zip(("graph_file", "directed", "has_vertex_pair", "is_weighted"), request.param)
+ )
return parameters
@@ -73,7 +77,10 @@ def input_expected_output(input_combo):
input_data_path = input_combo["graph_file"]
directed = input_combo["directed"]
has_vertex_pair = input_combo["has_vertex_pair"]
- G = utils.generate_cugraph_graph_from_file(input_data_path, directed=directed)
+ is_weighted = input_combo["is_weighted"]
+ G = utils.generate_cugraph_graph_from_file(
+ input_data_path, directed=directed, edgevals=is_weighted
+ )
if has_vertex_pair:
# Sample random vertices from the graph and compute the two_hop_neighbors
# with those seeds
@@ -85,7 +92,9 @@ def input_expected_output(input_combo):
vertex_pair = None
input_combo["vertex_pair"] = vertex_pair
- sg_cugraph_sorensen = cugraph.experimental.sorensen(G, input_combo["vertex_pair"])
+ sg_cugraph_sorensen = cugraph.sorensen(
+ G, input_combo["vertex_pair"], use_weight=is_weighted
+ )
# Save the results back to the input_combo dictionary to prevent redundant
# cuGraph runs. Other tests using the input_combo fixture will look for
# them, and if not present they will have to re-run the same cuGraph call.
@@ -105,6 +114,7 @@ def input_expected_output(input_combo):
ddf,
source="src",
destination="dst",
+ edge_attr="value" if is_weighted else None,
renumber=True,
store_transposed=True,
)
@@ -124,8 +134,11 @@ def input_expected_output(input_combo):
def test_dask_mg_sorensen(dask_client, benchmark, input_expected_output):
dg = input_expected_output["MGGraph"]
+ use_weight = input_expected_output["is_weighted"]
- result_sorensen = benchmark(dcg.sorensen, dg, input_expected_output["vertex_pair"])
+ result_sorensen = benchmark(
+ dcg.sorensen, dg, input_expected_output["vertex_pair"], use_weight=use_weight
+ )
result_sorensen = (
result_sorensen.compute()
@@ -153,41 +166,3 @@ def test_dask_mg_sorensen(dask_client, benchmark, input_expected_output):
assert len(sorensen_coeff_diffs1) == 0
assert len(sorensen_coeff_diffs2) == 0
-
-
-@pytest.mark.mg
-def test_dask_mg_weighted_sorensen(dask_client):
- input_data_path = datasets[0]
- chunksize = dcg.get_chunksize(input_data_path)
- ddf = dask_cudf.read_csv(
- input_data_path,
- chunksize=chunksize,
- delimiter=" ",
- names=["src", "dst", "value"],
- dtype=["int32", "int32", "float32"],
- )
-
- dg = cugraph.Graph(directed=False)
- dg.from_dask_cudf_edgelist(
- ddf,
- source="src",
- destination="dst",
- edge_attr="value",
- renumber=True,
- store_transposed=True,
- )
- with pytest.raises(ValueError):
- dcg.sorensen(dg)
-
- dg = cugraph.Graph(directed=False)
- dg.from_dask_cudf_edgelist(
- ddf,
- source="src",
- destination="dst",
- edge_attr="value",
- store_transposed=True,
- )
-
- use_weight = True
- with pytest.raises(ValueError):
- dcg.sorensen(dg, use_weight=use_weight)
diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py
index 5ea79e0893a..a945881394b 100644
--- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py
+++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py
@@ -16,7 +16,7 @@
import cudf
import cupy
import cugraph
-from cugraph.datasets import karate
+from cugraph.datasets import karate, email_Eu_core
from cugraph.experimental.gnn import BulkSampler
from cugraph.utilities.utils import create_directory_with_overwrite
@@ -297,3 +297,53 @@ def test_bulk_sampler_empty_batches(scratch_dir):
assert df.batch_id.max() == 1
shutil.rmtree(samples_path)
+
+
+@pytest.mark.sg
+def test_bulk_sampler_csr(scratch_dir):
+ el = email_Eu_core.get_edgelist()
+
+ G = cugraph.Graph(directed=True)
+ G.from_cudf_edgelist(el, source="src", destination="dst")
+
+ samples_path = os.path.join(scratch_dir, "test_bulk_sampler_csr")
+ create_directory_with_overwrite(samples_path)
+
+ bs = BulkSampler(
+ batch_size=7,
+ output_path=samples_path,
+ graph=G,
+ fanout_vals=[5, 4, 3],
+ with_replacement=False,
+ batches_per_partition=7,
+ renumber=True,
+ use_legacy_names=False,
+ compression="CSR",
+ compress_per_hop=False,
+ prior_sources_behavior="exclude",
+ include_hop_column=False,
+ )
+
+ seeds = G.select_random_vertices(62, 1000)
+ batch_ids = cudf.Series(
+ cupy.repeat(cupy.arange(int(1000 / 7) + 1, dtype="int32"), 7)[:1000]
+ ).sort_values()
+
+ batch_df = cudf.DataFrame(
+ {
+ "seed": seeds,
+ "batch": batch_ids,
+ }
+ )
+
+ bs.add_batches(batch_df, start_col_name="seed", batch_col_name="batch")
+ bs.flush()
+
+ assert len(os.listdir(samples_path)) == 21
+
+ for file in os.listdir(samples_path):
+ df = cudf.read_parquet(os.path.join(samples_path, file))
+
+ assert df.major_offsets.dropna().iloc[-1] - df.major_offsets.iloc[0] == len(df)
+
+ shutil.rmtree(samples_path)
diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py
index f71c16a8368..5eafe89ea83 100644
--- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py
+++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py
@@ -16,6 +16,7 @@
import pytest
+import cupy
import cudf
from cugraph.gnn.data_loading.bulk_sampler_io import write_samples
from cugraph.utilities.utils import create_directory_with_overwrite
@@ -34,7 +35,9 @@ def test_bulk_sampler_io(scratch_dir):
}
)
- offsets = cudf.DataFrame({"offsets": [0, 8], "batch_id": [0, 1]})
+ assert len(results) == 12
+
+ offsets = cudf.DataFrame({"offsets": [0, 8, 12], "batch_id": [0, 1, None]})
samples_path = os.path.join(scratch_dir, "test_bulk_sampler_io")
create_directory_with_overwrite(samples_path)
@@ -138,8 +141,12 @@ def test_bulk_sampler_io_empty_batch(scratch_dir):
}
)
+ assert len(results) == 20
+
# some batches are missing
- offsets = cudf.DataFrame({"offsets": [0, 8, 12, 16], "batch_id": [0, 3, 4, 10]})
+ offsets = cudf.DataFrame(
+ {"offsets": [0, 8, 12, 16, 20], "batch_id": [0, 3, 4, 10, None]}
+ )
samples_path = os.path.join(scratch_dir, "test_bulk_sampler_io_empty_batch")
create_directory_with_overwrite(samples_path)
@@ -157,3 +164,61 @@ def test_bulk_sampler_io_empty_batch(scratch_dir):
df1 = cudf.read_parquet(os.path.join(samples_path, "batch=4-5.parquet"))
assert df1.batch_id.min() == 4
assert df1.batch_id.max() == 5
+
+ shutil.rmtree(samples_path)
+
+
+@pytest.mark.sg
+def test_bulk_sampler_io_mock_csr(scratch_dir):
+ major_offsets_array = cudf.Series([0, 5, 10, 15])
+ minors_array = cudf.Series([1, 2, 3, 4, 8, 9, 1, 3, 4, 5, 3, 0, 4, 9, 1])
+ edge_ids = cudf.Series(cupy.arange(len(minors_array)))
+
+ # 2 hops
+ label_hop_offsets = cudf.Series([0, 1, 3])
+
+ # map
+ renumber_map = cudf.Series(cupy.arange(10))
+ renumber_map_offsets = cudf.Series([0, 10])
+
+ results_df = cudf.DataFrame()
+ results_df["minors"] = minors_array
+ results_df["major_offsets"] = major_offsets_array
+ results_df["edge_id"] = edge_ids
+ results_df["edge_type"] = None
+ results_df["weight"] = None
+
+ offsets_df = cudf.DataFrame()
+ offsets_df["offsets"] = label_hop_offsets
+ offsets_df["renumber_map_offsets"] = renumber_map_offsets
+ offsets_df["batch_id"] = cudf.Series([0])
+
+ renumber_df = cudf.DataFrame()
+ renumber_df["map"] = renumber_map
+
+ samples_path = os.path.join(scratch_dir, "test_bulk_sampler_io_mock_csr")
+ create_directory_with_overwrite(samples_path)
+
+ write_samples(results_df, offsets_df, renumber_df, 1, samples_path)
+
+ result = cudf.read_parquet(os.path.join(samples_path, "batch=0-0.parquet"))
+
+ assert (
+ result.minors.dropna().values_host.tolist() == minors_array.values_host.tolist()
+ )
+ assert (
+ result.major_offsets.dropna().values_host.tolist()
+ == major_offsets_array.values_host.tolist()
+ )
+ assert result.edge_id.dropna().values_host.tolist() == edge_ids.values_host.tolist()
+ assert (
+ result.renumber_map_offsets.dropna().values_host.tolist()
+ == renumber_map_offsets.values_host.tolist()
+ )
+ assert result.map.dropna().values_host.tolist() == renumber_map.values_host.tolist()
+ assert (
+ result.label_hop_offsets.dropna().values_host.tolist()
+ == label_hop_offsets.values_host.tolist()
+ )
+
+ shutil.rmtree(samples_path)
diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io_mg.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io_mg.py
index 41f68c08e5c..638cccbdcaa 100644
--- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io_mg.py
+++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io_mg.py
@@ -38,8 +38,12 @@ def test_bulk_sampler_io(scratch_dir):
divisions=[0, 8, 11]
)
- offsets = cudf.DataFrame({"offsets": [0, 0], "batch_id": [0, 1]})
- offsets = dask_cudf.from_cudf(offsets, npartitions=2)
+ assert len(results) == 12
+
+ offsets = cudf.DataFrame({"offsets": [0, 8, 0, 4], "batch_id": [0, None, 1, None]})
+ offsets = dask_cudf.from_cudf(offsets, npartitions=1).repartition(
+ divisions=[0, 2, 3]
+ )
samples_path = os.path.join(scratch_dir, "mg_test_bulk_sampler_io")
create_directory_with_overwrite(samples_path)
@@ -149,9 +153,11 @@ def test_bulk_sampler_io_empty_batch(scratch_dir):
)
# some batches are missing
- offsets = cudf.DataFrame({"offsets": [0, 8, 0, 4], "batch_id": [0, 3, 4, 10]})
+ offsets = cudf.DataFrame(
+ {"offsets": [0, 8, 12, 0, 4, 8], "batch_id": [0, 3, None, 4, 10, None]}
+ )
offsets = dask_cudf.from_cudf(offsets, npartitions=1).repartition(
- divisions=[0, 2, 3]
+ divisions=[0, 3, 5]
)
samples_path = os.path.join(scratch_dir, "mg_test_bulk_sampler_io_empty_batch")
diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py
index eded435f897..aee81e5ffed 100644
--- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py
+++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py
@@ -21,7 +21,7 @@
import cupy
import cugraph
import dask_cudf
-from cugraph.datasets import karate
+from cugraph.datasets import karate, email_Eu_core
from cugraph.experimental import BulkSampler
from cugraph.utilities.utils import create_directory_with_overwrite
@@ -247,3 +247,59 @@ def test_bulk_sampler_empty_batches(dask_client, scratch_dir):
assert df.batch_id.max() == 1
shutil.rmtree(samples_path)
+
+
+@pytest.mark.mg
+@pytest.mark.parametrize("mg_input", [True, False])
+def test_bulk_sampler_csr(dask_client, scratch_dir, mg_input):
+ nworkers = len(dask_client.scheduler_info()["workers"])
+ el = dask_cudf.from_cudf(email_Eu_core.get_edgelist(), npartitions=nworkers * 2)
+
+ G = cugraph.Graph(directed=True)
+ G.from_dask_cudf_edgelist(el, source="src", destination="dst")
+
+ samples_path = os.path.join(scratch_dir, "mg_test_bulk_sampler_csr")
+ create_directory_with_overwrite(samples_path)
+
+ bs = BulkSampler(
+ batch_size=7,
+ output_path=samples_path,
+ graph=G,
+ fanout_vals=[5, 4, 3],
+ with_replacement=False,
+ batches_per_partition=7,
+ renumber=True,
+ use_legacy_names=False,
+ compression="CSR",
+ compress_per_hop=True,
+ prior_sources_behavior="carryover",
+ deduplicate_sources=True,
+ include_hop_column=False,
+ )
+
+ seeds = G.select_random_vertices(62, 1000)
+ batch_ids = cudf.Series(
+ cupy.repeat(cupy.arange(int(1000 / 7) + 1, dtype="int32"), 7)[:1000]
+ ).sort_values()
+
+ batch_df = cudf.DataFrame(
+ {
+ "seed": seeds.compute().values,
+ "batch": batch_ids,
+ }
+ )
+
+ if mg_input:
+ batch_df = dask_cudf.from_cudf(batch_df, npartitions=2)
+
+ bs.add_batches(batch_df, start_col_name="seed", batch_col_name="batch")
+ bs.flush()
+
+ assert len(os.listdir(samples_path)) == 21
+
+ for file in os.listdir(samples_path):
+ df = cudf.read_parquet(os.path.join(samples_path, file))
+
+ assert df.major_offsets.dropna().iloc[-1] - df.major_offsets.iloc[0] == len(df)
+
+ shutil.rmtree(samples_path)
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
index 62599291d04..206898088ab 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py
@@ -15,6 +15,7 @@
import pytest
+import cupy
import cudf
import cugraph
from cugraph import uniform_neighbor_sample
@@ -151,7 +152,7 @@ def test_uniform_neighbor_sample_simple(input_combo):
G,
input_combo["start_list"],
input_combo["fanout_vals"],
- input_combo["with_replacement"],
+ with_replacement=input_combo["with_replacement"],
)
print(input_df)
@@ -254,7 +255,9 @@ def test_uniform_neighbor_sample_tree(directed):
start_list = cudf.Series([0, 0], dtype="int32")
fanout_vals = [4, 1, 3]
with_replacement = True
- result_nbr = uniform_neighbor_sample(G, start_list, fanout_vals, with_replacement)
+ result_nbr = uniform_neighbor_sample(
+ G, start_list, fanout_vals, with_replacement=with_replacement
+ )
result_nbr = result_nbr.drop_duplicates()
@@ -288,7 +291,7 @@ def test_uniform_neighbor_sample_unweighted(simple_unweighted_input_expected_out
test_data["Graph"],
test_data["start_list"].astype("int64"),
test_data["fanout_vals"],
- test_data["with_replacement"],
+ with_replacement=test_data["with_replacement"],
)
actual_src = sampling_results.sources
@@ -303,7 +306,8 @@ def test_uniform_neighbor_sample_unweighted(simple_unweighted_input_expected_out
@pytest.mark.sg
@pytest.mark.cugraph_ops
@pytest.mark.parametrize("return_offsets", [True, False])
-def test_uniform_neighbor_sample_edge_properties(return_offsets):
+@pytest.mark.parametrize("include_hop_column", [True, False])
+def test_uniform_neighbor_sample_edge_properties(return_offsets, include_hop_column):
edgelist_df = cudf.DataFrame(
{
"src": cudf.Series([0, 1, 2, 3, 4, 3, 4, 2, 0, 1, 0, 2], dtype="int32"),
@@ -337,6 +341,7 @@ def test_uniform_neighbor_sample_edge_properties(return_offsets):
with_edge_properties=True,
with_batch_ids=True,
return_offsets=return_offsets,
+ include_hop_column=include_hop_column,
)
if return_offsets:
sampling_results, sampling_offsets = sampling_results
@@ -359,11 +364,29 @@ def test_uniform_neighbor_sample_edge_properties(return_offsets):
== sampling_results["destinations"].values_host.tolist()
)
- assert sampling_results["hop_id"].values_host.tolist() == ([0, 0, 1, 1, 1, 1] * 2)
+ if include_hop_column:
+ assert sampling_results["hop_id"].values_host.tolist() == (
+ [0, 0, 1, 1, 1, 1] * 2
+ )
+ else:
+ assert "hop_id" not in sampling_results
if return_offsets:
- assert sampling_offsets["batch_id"].values_host.tolist() == [0, 1]
- assert sampling_offsets["offsets"].values_host.tolist() == [0, 6]
+ assert sampling_offsets["batch_id"].dropna().values_host.tolist() == [0, 1]
+ if include_hop_column:
+ assert sampling_offsets["offsets"].dropna().values_host.tolist() == [
+ 0,
+ 6,
+ 12,
+ ]
+ else:
+ assert sampling_offsets["offsets"].dropna().values_host.tolist() == [
+ 0,
+ 2,
+ 6,
+ 8,
+ 12,
+ ]
else:
assert sampling_results["batch_id"].values_host.tolist() == ([0] * 6 + [1] * 6)
@@ -778,6 +801,176 @@ def test_uniform_neighbor_sample_renumber(hops):
assert (renumber_map.batch_id == 0).all()
+@pytest.mark.sg
+@pytest.mark.parametrize("hops", [[5], [5, 5], [5, 5, 5]])
+def test_uniform_neighbor_sample_offset_renumber(hops):
+ el = email_Eu_core.get_edgelist()
+
+ G = cugraph.Graph(directed=True)
+ G.from_cudf_edgelist(el, source="src", destination="dst")
+
+ seeds = G.select_random_vertices(62, int(0.0001 * len(el)))
+
+ (
+ sampling_results_unrenumbered,
+ offsets_unrenumbered,
+ ) = cugraph.uniform_neighbor_sample(
+ G,
+ seeds,
+ hops,
+ with_replacement=False,
+ with_edge_properties=True,
+ with_batch_ids=False,
+ deduplicate_sources=True,
+ renumber=False,
+ return_offsets=True,
+ random_state=62,
+ )
+
+ (
+ sampling_results_renumbered,
+ offsets_renumbered,
+ renumber_map,
+ ) = cugraph.uniform_neighbor_sample(
+ G,
+ seeds,
+ hops,
+ with_replacement=False,
+ with_edge_properties=True,
+ with_batch_ids=False,
+ deduplicate_sources=True,
+ renumber=True,
+ return_offsets=True,
+ random_state=62,
+ )
+
+ sources_hop_0 = sampling_results_unrenumbered[
+ sampling_results_unrenumbered.hop_id == 0
+ ].sources
+ for hop in range(len(hops)):
+ destinations_hop = sampling_results_unrenumbered[
+ sampling_results_unrenumbered.hop_id <= hop
+ ].destinations
+ expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()
+
+ assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
+ renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
+ )
+
+ renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna()
+ assert len(renumber_map_offsets) == 2
+ assert renumber_map_offsets.iloc[0] == 0
+ assert renumber_map_offsets.iloc[-1] == len(renumber_map)
+
+ assert len(offsets_renumbered) == 2
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("hops", [[5], [5, 5], [5, 5, 5]])
+@pytest.mark.parametrize("seed", [62, 66, 68])
+def test_uniform_neighbor_sample_csr_csc_global(hops, seed):
+ el = email_Eu_core.get_edgelist()
+
+ G = cugraph.Graph(directed=True)
+ G.from_cudf_edgelist(el, source="src", destination="dst")
+
+ seeds = G.select_random_vertices(seed, int(0.0001 * len(el)))
+
+ sampling_results, offsets, renumber_map = cugraph.uniform_neighbor_sample(
+ G,
+ seeds,
+ hops,
+ with_replacement=False,
+ with_edge_properties=True,
+ with_batch_ids=False,
+ deduplicate_sources=True,
+ # carryover not valid because C++ sorts on (hop,src)
+ prior_sources_behavior="exclude",
+ renumber=True,
+ return_offsets=True,
+ random_state=seed,
+ use_legacy_names=False,
+ compress_per_hop=False,
+ compression="CSR",
+ include_hop_column=False,
+ )
+
+ major_offsets = sampling_results["major_offsets"].dropna().values
+ majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
+ majors = majors.repeat(cupy.diff(major_offsets))
+
+ minors = sampling_results["minors"].dropna()
+ assert len(majors) == len(minors)
+
+ majors = renumber_map.map.iloc[majors]
+ minors = renumber_map.map.iloc[minors]
+
+ for i in range(len(majors)):
+ assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
+
+
+@pytest.mark.sg
+@pytest.mark.parametrize("seed", [62, 66, 68])
+@pytest.mark.parametrize("hops", [[5], [5, 5], [5, 5, 5]])
+def test_uniform_neighbor_sample_csr_csc_local(hops, seed):
+ el = email_Eu_core.get_edgelist(download=True)
+
+ G = cugraph.Graph(directed=True)
+ G.from_cudf_edgelist(el, source="src", destination="dst")
+
+ seeds = cudf.Series(
+ [49, 71], dtype="int32"
+ ) # hardcoded to ensure out-degree is high enough
+
+ sampling_results, offsets, renumber_map = cugraph.uniform_neighbor_sample(
+ G,
+ seeds,
+ hops,
+ with_replacement=False,
+ with_edge_properties=True,
+ with_batch_ids=False,
+ deduplicate_sources=True,
+ prior_sources_behavior="carryover",
+ renumber=True,
+ return_offsets=True,
+ random_state=seed,
+ use_legacy_names=False,
+ compress_per_hop=True,
+ compression="CSR",
+ include_hop_column=False,
+ )
+
+ for hop in range(len(hops)):
+ major_offsets = sampling_results["major_offsets"].iloc[
+ offsets.offsets.iloc[hop] : (offsets.offsets.iloc[hop + 1] + 1)
+ ]
+
+ minors = sampling_results["minors"].iloc[
+ major_offsets.iloc[0] : major_offsets.iloc[-1]
+ ]
+
+ majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
+ majors = majors.repeat(cupy.diff(major_offsets))
+
+ majors = renumber_map.map.iloc[majors]
+ minors = renumber_map.map.iloc[minors]
+
+ for i in range(len(majors)):
+ assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
+
+
+@pytest.mark.sg
+@pytest.mark.skip(reason="needs to be written!")
+def test_uniform_neighbor_sample_dcsr_dcsc_global():
+ raise NotImplementedError
+
+
+@pytest.mark.sg
+@pytest.mark.skip(reason="needs to be written!")
+def test_uniform_neighbor_sample_dcsr_dcsc_local():
+ raise NotImplementedError
+
+
@pytest.mark.sg
@pytest.mark.skip(reason="needs to be written!")
def test_multi_client_sampling():
diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
index 9d87c097287..460a25cbd14 100644
--- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
+++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py
@@ -17,6 +17,7 @@
import pytest
+import pandas
import cupy
import cudf
import cugraph
@@ -138,7 +139,7 @@ def test_mg_uniform_neighbor_sample_simple(dask_client, input_combo):
dg,
input_combo["start_list"],
input_combo["fanout_vals"],
- input_combo["with_replacement"],
+ with_replacement=input_combo["with_replacement"],
)
# multi edges are dropped to easily verify that each edge in the
@@ -228,7 +229,9 @@ def test_mg_uniform_neighbor_sample_tree(dask_client, directed):
start_list = cudf.Series([0, 0], dtype="int32")
fanout_vals = [4, 1, 3]
with_replacement = True
- result_nbr = uniform_neighbor_sample(G, start_list, fanout_vals, with_replacement)
+ result_nbr = uniform_neighbor_sample(
+ G, start_list, fanout_vals, with_replacement=with_replacement
+ )
result_nbr = result_nbr.drop_duplicates()
@@ -283,7 +286,7 @@ def test_mg_uniform_neighbor_sample_unweighted(dask_client):
with_replacement = True
sampling_results = uniform_neighbor_sample(
- G, start_list, fanout_vals, with_replacement
+ G, start_list, fanout_vals, with_replacement=with_replacement
)
expected_src = [0, 0]
@@ -380,13 +383,17 @@ def test_uniform_neighbor_sample_edge_properties(dask_client, return_offsets):
dfp = sampling_results.get_partition(i).compute()
if len(dfp) > 0:
offsets_p = sampling_offsets.get_partition(i).compute()
+ print(offsets_p)
assert len(offsets_p) > 0
if offsets_p.batch_id.iloc[0] == 1:
batches_found[1] += 1
- assert offsets_p.batch_id.values_host.tolist() == [1]
- assert offsets_p.offsets.values_host.tolist() == [0]
+ assert offsets_p.batch_id.dropna().values_host.tolist() == [1]
+ assert offsets_p.offsets.dropna().values_host.tolist() == [
+ 0,
+ len(dfp),
+ ]
assert sorted(dfp.sources.values_host.tolist()) == (
[1, 1, 3, 3, 4, 4]
@@ -397,8 +404,11 @@ def test_uniform_neighbor_sample_edge_properties(dask_client, return_offsets):
elif offsets_p.batch_id.iloc[0] == 0:
batches_found[0] += 1
- assert offsets_p.batch_id.values_host.tolist() == [0]
- assert offsets_p.offsets.values_host.tolist() == [0]
+ assert offsets_p.batch_id.dropna().values_host.tolist() == [0]
+ assert offsets_p.offsets.dropna().values_host.tolist() == [
+ 0,
+ len(dfp),
+ ]
assert sorted(dfp.sources.values_host.tolist()) == (
[0, 0, 0, 1, 1, 2, 2, 2, 4, 4]
@@ -703,7 +713,6 @@ def test_uniform_neighbor_sample_batched(dask_client, dataset, input_df, max_bat
source="src",
destination="dst",
edge_attr=["wgt", "eid", "etp"],
- legacy_renum_only=True,
)
input_vertices = dask_cudf.concat([df.src, df.dst]).unique().compute()
@@ -960,7 +969,6 @@ def test_uniform_neighbor_sample_deduplicate_sources_email_eu_core(dask_client):
@pytest.mark.mg
@pytest.mark.parametrize("hops", [[5], [5, 5], [5, 5, 5]])
-@pytest.mark.tags("runme")
def test_uniform_neighbor_sample_renumber(dask_client, hops):
# FIXME This test is not very good because there is a lot of
# non-deterministic behavior that still exists despite passing
@@ -1005,6 +1013,224 @@ def test_uniform_neighbor_sample_renumber(dask_client, hops):
)
+@pytest.mark.mg
+@pytest.mark.parametrize("hops", [[5], [5, 5], [5, 5, 5]])
+def test_uniform_neighbor_sample_offset_renumber(dask_client, hops):
+ el = dask_cudf.from_cudf(email_Eu_core.get_edgelist(), npartitions=4)
+
+ G = cugraph.Graph(directed=True)
+ G.from_dask_cudf_edgelist(el, source="src", destination="dst")
+
+ seeds = G.select_random_vertices(62, int(0.0001 * len(el)))
+
+ (
+ sampling_results_unrenumbered,
+ offsets_unrenumbered,
+ ) = cugraph.dask.uniform_neighbor_sample(
+ G,
+ seeds,
+ hops,
+ with_replacement=False,
+ with_edge_properties=True,
+ with_batch_ids=False,
+ deduplicate_sources=True,
+ renumber=False,
+ return_offsets=True,
+ random_state=62,
+ )
+ sampling_results_unrenumbered = sampling_results_unrenumbered.compute()
+ offsets_unrenumbered = offsets_unrenumbered.compute()
+
+ (
+ sampling_results_renumbered,
+ offsets_renumbered,
+ renumber_map,
+ ) = cugraph.dask.uniform_neighbor_sample(
+ G,
+ seeds,
+ hops,
+ with_replacement=False,
+ with_edge_properties=True,
+ with_batch_ids=False,
+ deduplicate_sources=True,
+ renumber=True,
+ keep_batches_together=True,
+ min_batch_id=0,
+ max_batch_id=0,
+ return_offsets=True,
+ random_state=62,
+ )
+
+ # can't use compute() since empty batches still get a partition
+ n_workers = len(dask_client.scheduler_info()["workers"])
+ for p in range(n_workers):
+ partition = offsets_renumbered.get_partition(p).compute()
+ if not pandas.isna(partition.batch_id.iloc[0]):
+ break
+
+ sampling_results_renumbered = sampling_results_renumbered.get_partition(p).compute()
+ offsets_renumbered = offsets_renumbered.get_partition(p).compute()
+ renumber_map = renumber_map.get_partition(p).compute()
+
+ sources_hop_0 = sampling_results_unrenumbered[
+ sampling_results_unrenumbered.hop_id == 0
+ ].sources
+ for hop in range(len(hops)):
+ destinations_hop = sampling_results_unrenumbered[
+ sampling_results_unrenumbered.hop_id <= hop
+ ].destinations
+ expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique()
+
+ assert sorted(expected_renumber_map.values_host.tolist()) == sorted(
+ renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist()
+ )
+
+ renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna()
+ assert len(renumber_map_offsets) == 2
+ assert renumber_map_offsets.iloc[0] == 0
+ assert renumber_map_offsets.iloc[-1] == len(renumber_map)
+
+ assert len(offsets_renumbered) == 2
+
+
+@pytest.mark.mg
+@pytest.mark.parametrize("hops", [[5], [5, 5], [5, 5, 5]])
+@pytest.mark.parametrize("seed", [62, 66, 68])
+def test_uniform_neighbor_sample_csr_csc_global(dask_client, hops, seed):
+ el = dask_cudf.from_cudf(email_Eu_core.get_edgelist(), npartitions=4)
+
+ G = cugraph.Graph(directed=True)
+ G.from_dask_cudf_edgelist(el, source="src", destination="dst")
+
+ seeds = G.select_random_vertices(seed, int(0.0001 * len(el)))
+
+ sampling_results, offsets, renumber_map = cugraph.dask.uniform_neighbor_sample(
+ G,
+ seeds,
+ hops,
+ with_replacement=False,
+ with_edge_properties=True,
+ with_batch_ids=False,
+ deduplicate_sources=True,
+ # carryover not valid because C++ sorts on (hop,src)
+ prior_sources_behavior="exclude",
+ renumber=True,
+ return_offsets=True,
+ random_state=seed,
+ use_legacy_names=False,
+ compress_per_hop=False,
+ compression="CSR",
+ include_hop_column=False,
+ keep_batches_together=True,
+ min_batch_id=0,
+ max_batch_id=0,
+ )
+
+ # can't use compute() since empty batches still get a partition
+ n_workers = len(dask_client.scheduler_info()["workers"])
+ for p in range(n_workers):
+ partition = offsets.get_partition(p).compute()
+ if not pandas.isna(partition.batch_id.iloc[0]):
+ break
+
+ sampling_results = sampling_results.get_partition(p).compute()
+ offsets = offsets.get_partition(p).compute()
+ renumber_map = renumber_map.get_partition(p).compute()
+
+ major_offsets = sampling_results["major_offsets"].dropna().values
+ majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
+ majors = majors.repeat(cupy.diff(major_offsets))
+
+ minors = sampling_results["minors"].dropna()
+ assert len(majors) == len(minors)
+
+ majors = renumber_map.map.iloc[majors]
+ minors = renumber_map.map.iloc[minors]
+
+ for i in range(len(majors)):
+ assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
+
+
+@pytest.mark.mg
+@pytest.mark.parametrize("seed", [62, 66, 68])
+@pytest.mark.parametrize("hops", [[5], [5, 5], [5, 5, 5]])
+def test_uniform_neighbor_sample_csr_csc_local(dask_client, hops, seed):
+ el = dask_cudf.from_cudf(email_Eu_core.get_edgelist(), npartitions=4)
+
+ G = cugraph.Graph(directed=True)
+ G.from_dask_cudf_edgelist(el, source="src", destination="dst")
+
+ seeds = dask_cudf.from_cudf(
+ cudf.Series([49, 71], dtype="int32"), npartitions=1
+ ) # hardcoded to ensure out-degree is high enough
+
+ sampling_results, offsets, renumber_map = cugraph.dask.uniform_neighbor_sample(
+ G,
+ seeds,
+ hops,
+ with_replacement=False,
+ with_edge_properties=True,
+ with_batch_ids=False,
+ deduplicate_sources=True,
+ prior_sources_behavior="carryover",
+ renumber=True,
+ return_offsets=True,
+ random_state=seed,
+ use_legacy_names=False,
+ compress_per_hop=True,
+ compression="CSR",
+ include_hop_column=False,
+ keep_batches_together=True,
+ min_batch_id=0,
+ max_batch_id=0,
+ )
+
+ # can't use compute() since empty batches still get a partition
+ n_workers = len(dask_client.scheduler_info()["workers"])
+ for p in range(n_workers):
+ partition = offsets.get_partition(p).compute()
+
+ if not pandas.isna(partition.batch_id.iloc[0]):
+ break
+
+ sampling_results = sampling_results.get_partition(p).compute()
+ offsets = offsets.get_partition(p).compute()
+ renumber_map = renumber_map.get_partition(p).compute()
+
+ print(sampling_results)
+ print(offsets)
+
+ for hop in range(len(hops)):
+ major_offsets = sampling_results["major_offsets"].iloc[
+ offsets.offsets.iloc[hop] : (offsets.offsets.iloc[hop + 1] + 1)
+ ]
+
+ minors = sampling_results["minors"].iloc[
+ major_offsets.iloc[0] : major_offsets.iloc[-1]
+ ]
+
+ majors = cudf.Series(cupy.arange(len(major_offsets) - 1))
+ majors = majors.repeat(cupy.diff(major_offsets))
+
+ majors = renumber_map.map.iloc[majors]
+ minors = renumber_map.map.iloc[minors]
+
+ for i in range(len(majors)):
+ assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])])
+
+
+@pytest.mark.mg
+@pytest.mark.skip(reason="needs to be written!")
+def test_uniform_neighbor_sample_dcsr_dcsc_global():
+ raise NotImplementedError
+
+
+@pytest.mark.mg
+@pytest.mark.skip(reason="needs to be written!")
+def test_uniform_neighbor_sample_dcsr_dcsc_local():
+ raise NotImplementedError
+
+
# =============================================================================
# Benchmarks
# =============================================================================
diff --git a/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py b/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py
index 8ffbecea4fc..5eafc231141 100644
--- a/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py
+++ b/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py
@@ -12,6 +12,7 @@
# limitations under the License.
import gc
+import random
import pytest
@@ -61,7 +62,9 @@ def modify_dataset(df):
return cudf.concat([df, temp_df])
meta = ddf._meta
- ddf = ddf.map_partitions(modify_dataset, meta=meta)
+ ddf = ddf.map_partitions(
+ modify_dataset, meta=meta, token="custom-" + str(random.random())
+ )
df = cudf.read_csv(
input_data_path,
diff --git a/python/cugraph/cugraph/utilities/utils.py b/python/cugraph/cugraph/utilities/utils.py
index e68b5dd4880..7a54a0bf2cf 100644
--- a/python/cugraph/cugraph/utilities/utils.py
+++ b/python/cugraph/cugraph/utilities/utils.py
@@ -364,8 +364,8 @@ def is_matrix_type(m):
return is_cp_matrix_type(m) or is_sp_matrix_type(m)
-def is_nx_graph_type(g):
- return g in __nx_graph_types
+def is_nx_graph_type(graph_type):
+ return graph_type in __nx_graph_types
def is_cugraph_graph_type(g):
diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml
index cadf6879e23..1835ac8bb49 100644
--- a/python/cugraph/pyproject.toml
+++ b/python/cugraph/pyproject.toml
@@ -33,8 +33,8 @@ dependencies = [
"cupy-cuda11x>=12.0.0",
"dask-cuda==23.10.*",
"dask-cudf==23.10.*",
- "dask>=2023.7.1",
- "distributed>=2023.7.1",
+ "dask==2023.9.2",
+ "distributed==2023.9.2",
"fsspec[http]>=0.6.0",
"numba>=0.57",
"pylibcugraph==23.10.*",
diff --git a/python/nx-cugraph/.flake8 b/python/nx-cugraph/.flake8
index 3a2e3fb8617..c5874e54f7e 100644
--- a/python/nx-cugraph/.flake8
+++ b/python/nx-cugraph/.flake8
@@ -11,3 +11,4 @@ extend-ignore =
per-file-ignores =
nx_cugraph/tests/*.py:T201,
__init__.py:F401,F403,
+ _nx_cugraph/__init__.py:E501,
diff --git a/python/nx-cugraph/Makefile b/python/nx-cugraph/Makefile
index c9caf147d53..6e1b98ee6e9 100644
--- a/python/nx-cugraph/Makefile
+++ b/python/nx-cugraph/Makefile
@@ -1,7 +1,17 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
SHELL= /bin/bash
+.PHONY: all
+all: plugin-info lint
+
+.PHONY: lint
lint:
git ls-files | xargs pre-commit run --config lint.yaml --files
+
+.PHONY: lint-update
lint-update:
pre-commit autoupdate --config lint.yaml
+
+.PHONY: plugin-info
+plugin-info:
+ python _nx_cugraph/__init__.py
diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py
new file mode 100644
index 00000000000..9b3332106ec
--- /dev/null
+++ b/python/nx-cugraph/_nx_cugraph/__init__.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tell NetworkX about the cugraph backend. This file can update itself:
+
+$ make plugin-info # Recommended method for development
+
+or
+
+$ python _nx_cugraph/__init__.py
+"""
+
+# Entries between BEGIN and END are automatically generated
+_info = {
+ "backend_name": "cugraph",
+ "project": "nx-cugraph",
+ "package": "nx_cugraph",
+ "url": "https://github.com/rapidsai/cugraph/tree/branch-23.10/python/nx-cugraph",
+ "short_summary": "GPU-accelerated backend.",
+ # "description": "TODO",
+ "functions": {
+ # BEGIN: functions
+ "betweenness_centrality",
+ "edge_betweenness_centrality",
+ "louvain_communities",
+ # END: functions
+ },
+ "extra_docstrings": {
+ # BEGIN: extra_docstrings
+ "betweenness_centrality": "`weight` parameter is not yet supported.",
+ "edge_betweenness_centrality": "`weight` parameter is not yet supported.",
+ "louvain_communities": "`threshold` and `seed` parameters are currently ignored.",
+ # END: extra_docstrings
+ },
+ "extra_parameters": {
+ # BEGIN: extra_parameters
+ "louvain_communities": {
+ "max_level : int, optional": "Upper limit of the number of macro-iterations.",
+ },
+ # END: extra_parameters
+ },
+}
+
+
+def get_info():
+ """Target of ``networkx.plugin_info`` entry point.
+
+ This tells NetworkX about the cugraph backend without importing nx_cugraph.
+ """
+ # Convert to e.g. `{"functions": {"myfunc": {"extra_docstring": ...}}}`
+ d = _info.copy()
+ info_keys = {
+ "extra_docstrings": "extra_docstring",
+ "extra_parameters": "extra_parameters",
+ }
+ d["functions"] = {
+ func: {
+ new_key: vals[func]
+ for old_key, new_key in info_keys.items()
+ if func in (vals := d[old_key])
+ }
+ for func in d["functions"]
+ }
+ for key in info_keys:
+ del d[key]
+ return d
+
+
+__version__ = "23.10.00"
+
+if __name__ == "__main__":
+ from pathlib import Path
+
+ from _nx_cugraph.core import main
+
+ filepath = Path(__file__)
+ text = main(filepath)
+ with filepath.open("w") as f:
+ f.write(text)
diff --git a/python/nx-cugraph/_nx_cugraph/core.py b/python/nx-cugraph/_nx_cugraph/core.py
new file mode 100644
index 00000000000..72f9203897e
--- /dev/null
+++ b/python/nx-cugraph/_nx_cugraph/core.py
@@ -0,0 +1,90 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities to help keep _nx_cugraph up to date."""
+
+
+def get_functions():
+ from nx_cugraph.interface import BackendInterface
+ from nx_cugraph.utils import networkx_algorithm
+
+ return {
+ key: val
+ for key, val in vars(BackendInterface).items()
+ if isinstance(val, networkx_algorithm)
+ }
+
+
+def get_extra_docstrings(functions=None):
+ if functions is None:
+ functions = get_functions()
+ return {key: val.extra_doc for key, val in functions.items() if val.extra_doc}
+
+
+def get_extra_parameters(functions=None):
+ if functions is None:
+ functions = get_functions()
+ return {key: val.extra_params for key, val in functions.items() if val.extra_params}
+
+
+def update_text(text, lines_to_add, target, indent=" " * 8):
+ begin = f"# BEGIN: {target}\n"
+ end = f"# END: {target}\n"
+ start = text.index(begin)
+ stop = text.index(end)
+ to_add = "\n".join([f"{indent}{line}" for line in lines_to_add])
+ return f"{text[:start]}{begin}{to_add}\n{indent}{text[stop:]}"
+
+
+def dict_to_lines(d, *, indent=""):
+ for key in sorted(d):
+ val = d[key]
+ if "\n" not in val:
+ yield f"{indent}{key!r}: {val!r},"
+ else:
+ yield f"{indent}{key!r}: ("
+ *lines, last_line = val.split("\n")
+ for line in lines:
+ line += "\n"
+ yield f" {indent}{line!r}"
+ yield f" {indent}{last_line!r}"
+ yield f"{indent}),"
+
+
+def main(filepath):
+ from pathlib import Path
+
+ filepath = Path(filepath)
+ with filepath.open() as f:
+ orig_text = f.read()
+ text = orig_text
+
+ # Update functions
+ functions = get_functions()
+ to_add = [f'"{name}",' for name in sorted(functions)]
+ text = update_text(text, to_add, "functions")
+
+ # Update extra_docstrings
+ extra_docstrings = get_extra_docstrings(functions)
+ to_add = list(dict_to_lines(extra_docstrings))
+ text = update_text(text, to_add, "extra_docstrings")
+
+ # Update extra_parameters
+ extra_parameters = get_extra_parameters(functions)
+ to_add = []
+ for name in sorted(extra_parameters):
+ params = extra_parameters[name]
+ to_add.append(f"{name!r}: {{")
+ to_add.extend(dict_to_lines(params, indent=" " * 4))
+ to_add.append("},")
+ text = update_text(text, to_add, "extra_parameters")
+ return text
diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml
index dba061bd6b5..6a462a6af79 100644
--- a/python/nx-cugraph/lint.yaml
+++ b/python/nx-cugraph/lint.yaml
@@ -31,7 +31,7 @@ repos:
- id: validate-pyproject
name: Validate pyproject.toml
- repo: https://github.com/PyCQA/autoflake
- rev: v2.2.0
+ rev: v2.2.1
hooks:
- id: autoflake
args: [--in-place]
@@ -40,17 +40,17 @@ repos:
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
- rev: v3.10.1
+ rev: v3.13.0
hooks:
- id: pyupgrade
args: [--py39-plus]
- repo: https://github.com/psf/black
- rev: 23.7.0
+ rev: 23.9.1
hooks:
- id: black
# - id: black-jupyter
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.0.286
+ rev: v0.0.291
hooks:
- id: ruff
args: [--fix-only, --show-fixes]
@@ -58,11 +58,12 @@ repos:
rev: 6.1.0
hooks:
- id: flake8
+ args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501'] # Why is this necessary?
additional_dependencies: &flake8_dependencies
- # These versions need updated manually
- - flake8==6.1.0
- - flake8-bugbear==23.7.10
- - flake8-simplify==0.20.0
+ # These versions need updated manually
+ - flake8==6.1.0
+ - flake8-bugbear==23.9.16
+ - flake8-simplify==0.20.0
- repo: https://github.com/asottile/yesqa
rev: v1.5.0
hooks:
@@ -76,7 +77,7 @@ repos:
additional_dependencies: [tomli]
files: ^(nx_cugraph|docs)/
- repo: https://github.com/astral-sh/ruff-pre-commit
- rev: v0.0.286
+ rev: v0.0.291
hooks:
- id: ruff
- repo: https://github.com/pre-commit/pre-commit-hooks
diff --git a/python/nx-cugraph/nx_cugraph/__init__.py b/python/nx-cugraph/nx_cugraph/__init__.py
index 28066fe2b02..4a0e95a109f 100644
--- a/python/nx-cugraph/nx_cugraph/__init__.py
+++ b/python/nx-cugraph/nx_cugraph/__init__.py
@@ -12,9 +12,21 @@
# limitations under the License.
from networkx.exception import *
-from . import algorithms, classes, convert, utils
-from .algorithms import *
+from . import utils
+
+from . import classes
from .classes import *
+
+from . import convert
from .convert import *
+# from . import convert_matrix
+# from .convert_matrix import *
+
+# from . import generators
+# from .generators import *
+
+from . import algorithms
+from .algorithms import *
+
__version__ = "23.10.00"
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py
index b777919f86f..104ac87414c 100644
--- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py
+++ b/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py
@@ -13,7 +13,7 @@
import pylibcugraph as plc
from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import _handle_seed, networkx_algorithm
+from nx_cugraph.utils import _seed_to_int, networkx_algorithm
__all__ = ["betweenness_centrality", "edge_betweenness_centrality"]
@@ -22,11 +22,12 @@
def betweenness_centrality(
G, k=None, normalized=True, weight=None, endpoints=False, seed=None
):
+ """`weight` parameter is not yet supported."""
if weight is not None:
raise NotImplementedError(
"Weighted implementation of betweenness centrality not currently supported"
)
- seed = _handle_seed(seed)
+ seed = _seed_to_int(seed)
G = _to_graph(G, weight)
node_ids, values = plc.betweenness_centrality(
resource_handle=plc.ResourceHandle(),
@@ -47,6 +48,7 @@ def _(G, k=None, normalized=True, weight=None, endpoints=False, seed=None):
@networkx_algorithm
def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):
+ """`weight` parameter is not yet supported."""
if weight is not None:
raise NotImplementedError(
"Weighted implementation of betweenness centrality not currently supported"
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py
index ca5f05c2014..a183b59fe1d 100644
--- a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py
+++ b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py
@@ -17,7 +17,7 @@
from nx_cugraph.convert import _to_undirected_graph
from nx_cugraph.utils import (
_groupby,
- _handle_seed,
+ _seed_to_int,
networkx_algorithm,
not_implemented_for,
)
@@ -26,16 +26,17 @@
@not_implemented_for("directed")
-@networkx_algorithm(extra_params="max_level")
+@networkx_algorithm(
+ extra_params={
+ "max_level : int, optional": "Upper limit of the number of macro-iterations."
+ }
+)
def louvain_communities(
G, weight="weight", resolution=1, threshold=0.0000001, seed=None, *, max_level=None
):
- """`threshold` and `seed` parameters are currently ignored.
-
- Extra parameter: `max_level` controls the maximum number of levels of the algorithm.
- """
+ """`threshold` and `seed` parameters are currently ignored."""
# NetworkX allows both directed and undirected, but cugraph only allows undirected.
- seed = _handle_seed(seed) # Unused, but ensure it's valid for future compatibility
+ seed = _seed_to_int(seed) # Unused, but ensure it's valid for future compatibility
G = _to_undirected_graph(G, weight)
if G.row_indices.size == 0:
# TODO: PLC doesn't handle empty graphs gracefully!
@@ -46,8 +47,8 @@ def louvain_communities(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
max_level=max_level, # TODO: add this parameter to NetworkX
+ threshold=threshold,
resolution=resolution,
- # threshold=threshold, # TODO: add this parameter to PLC
do_expensive_check=False,
)
groups = _groupby(clusters, vertices)
diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py
index cc750cd2d5b..2ad23acd940 100644
--- a/python/nx-cugraph/nx_cugraph/interface.py
+++ b/python/nx-cugraph/nx_cugraph/interface.py
@@ -62,9 +62,7 @@ def key(testpath):
# Reasons for xfailing
no_weights = "weighted implementation not currently supported"
no_multigraph = "multigraphs not currently supported"
- louvain_different = (
- "Louvain may be different due to RNG or unsupported threshold parameter"
- )
+ louvain_different = "Louvain may be different due to RNG"
xfail = {}
@@ -176,7 +174,6 @@ def key(testpath):
): louvain_different,
key("test_louvain.py:test_none_weight_param"): louvain_different,
key("test_louvain.py:test_multigraph"): louvain_different,
- key("test_louvain.py:test_threshold"): louvain_different,
}
)
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_match_api.py b/python/nx-cugraph/nx_cugraph/tests/test_match_api.py
index 64d3704dd65..ecfda1397db 100644
--- a/python/nx-cugraph/nx_cugraph/tests/test_match_api.py
+++ b/python/nx-cugraph/nx_cugraph/tests/test_match_api.py
@@ -45,11 +45,14 @@ def test_match_signature_and_names():
assert orig_sig == func_sig
else:
# Ignore extra parameters added to nx-cugraph algorithm
+ # The key of func.extra_params may be like "max_level : int, optional",
+ # but we only want "max_level" here.
+ extra_params = {name.split(" ")[0] for name in func.extra_params}
assert orig_sig == func_sig.replace(
parameters=[
p
for name, p in func_sig.parameters.items()
- if name not in func.extra_params
+ if name not in extra_params
]
)
if func.can_run is not nxcg.utils.decorators._default_can_run:
diff --git a/python/nx-cugraph/nx_cugraph/utils/decorators.py b/python/nx-cugraph/nx_cugraph/utils/decorators.py
index 3dbdb07e87f..0f15d236ecd 100644
--- a/python/nx-cugraph/nx_cugraph/utils/decorators.py
+++ b/python/nx-cugraph/nx_cugraph/utils/decorators.py
@@ -10,13 +10,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+from __future__ import annotations
+
from functools import partial, update_wrapper
-from networkx.utils.decorators import not_implemented_for
+from networkx.utils.decorators import nodes_or_number, not_implemented_for
from nx_cugraph.interface import BackendInterface
-__all__ = ["not_implemented_for", "networkx_algorithm"]
+try:
+ from networkx.utils.backends import _registered_algorithms
+except ModuleNotFoundError:
+ from networkx.classes.backends import _registered_algorithms
+
+
+__all__ = ["not_implemented_for", "nodes_or_number", "networkx_algorithm"]
def networkx_class(api):
@@ -28,7 +36,17 @@ def inner(func):
class networkx_algorithm:
- def __new__(cls, func=None, *, name=None, extra_params=None):
+ name: str
+ extra_doc: str | None
+ extra_params: dict[str, str] | None
+
+ def __new__(
+ cls,
+ func=None,
+ *,
+ name: str | None = None,
+ extra_params: dict[str, str] | str | None = None,
+ ):
if func is None:
return partial(networkx_algorithm, name=name, extra_params=extra_params)
instance = object.__new__(cls)
@@ -37,13 +55,20 @@ def __new__(cls, func=None, *, name=None, extra_params=None):
instance.__defaults__ = func.__defaults__
instance.__kwdefaults__ = func.__kwdefaults__
instance.name = func.__name__ if name is None else name
- # TODO: should extra_params be a dict[str, str] that describes the parameters?
if extra_params is None:
- instance.extra_params = None
+ pass
elif isinstance(extra_params, str):
- instance.extra_params = {extra_params}
- else:
- instance.extra_params = set(extra_params)
+ extra_params = {extra_params: ""}
+ elif not isinstance(extra_params, dict):
+ raise TypeError(
+ f"extra_params must be dict, str, or None; got {type(extra_params)}"
+ )
+ instance.extra_params = extra_params
+ # The docstring on our function is added to the NetworkX docstring.
+ instance.extra_doc = func.__doc__
+ # Copy __doc__ from NetworkX
+ if instance.name in _registered_algorithms:
+ instance.__doc__ = _registered_algorithms[instance.name].__doc__
instance.can_run = _default_can_run
setattr(BackendInterface, instance.name, instance)
# Set methods so they are in __dict__
diff --git a/python/nx-cugraph/nx_cugraph/utils/misc.py b/python/nx-cugraph/nx_cugraph/utils/misc.py
index 64c0be066f2..72e4094b8b7 100644
--- a/python/nx-cugraph/nx_cugraph/utils/misc.py
+++ b/python/nx-cugraph/nx_cugraph/utils/misc.py
@@ -18,7 +18,7 @@
import cupy as cp
-__all__ = ["_groupby", "_handle_seed"]
+__all__ = ["_groupby", "_seed_to_int"]
def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]:
@@ -51,8 +51,8 @@ def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]:
return rv
-def _handle_seed(seed: int | Random | None) -> int:
- """Handle seed argument and ensure it is what pylibcugraph needs: an int."""
+def _seed_to_int(seed: int | Random | None) -> int:
+ """Handle any valid seed argument and convert it to an int if necessary."""
if seed is None:
return
if isinstance(seed, Random):
diff --git a/python/nx-cugraph/pyproject.toml b/python/nx-cugraph/pyproject.toml
index 95e9c256e5d..db3b3a22545 100644
--- a/python/nx-cugraph/pyproject.toml
+++ b/python/nx-cugraph/pyproject.toml
@@ -54,6 +54,9 @@ Documentation = "https://docs.rapids.ai/api/cugraph/stable/"
[project.entry-points."networkx.plugins"]
cugraph = "nx_cugraph.interface:BackendInterface"
+[project.entry-points."networkx.plugin_info"]
+cugraph = "_nx_cugraph:get_info"
+
[tool.setuptools]
license-files = ["LICENSE"]
@@ -61,6 +64,8 @@ license-files = ["LICENSE"]
include = [
"nx_cugraph*",
"nx_cugraph.*",
+ "_nx_cugraph*",
+ "_nx_cugraph.*",
]
[tool.black]
@@ -75,6 +80,7 @@ float_to_top = true
default_section = "THIRDPARTY"
known_first_party = "nx_cugraph"
line_length = 88
+extend_skip_glob = ["nx_cugraph/__init__.py"]
[tool.pytest.ini_options]
minversion = "6.0"
@@ -128,6 +134,9 @@ exclude_lines = [
# https://github.com/charliermarsh/ruff/
line-length = 88
target-version = "py39"
+unfixable = [
+ "F841", # unused-variable (Note: can leave useless expression)
+]
select = [
"ALL",
]
@@ -203,6 +212,7 @@ ignore = [
"__init__.py" = ["F401"] # Allow unused imports (w/o defining `__all__`)
# Allow assert, print, RNG, and no docstring
"nx_cugraph/**/tests/*py" = ["S101", "S311", "T201", "D103", "D100"]
+"_nx_cugraph/__init__.py" = ["E501"]
[tool.ruff.flake8-annotations]
mypy-init-return = true
diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
index 5550d5287bc..c2e22fc1ff7 100644
--- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt
+++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
@@ -35,6 +35,7 @@ set(cython_sources
hits.pyx
induced_subgraph.pyx
k_core.pyx
+ k_truss_subgraph.pyx
jaccard_coefficients.pyx
sorensen_coefficients.pyx
overlap_coefficients.pyx
diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py
index 194d8261064..98ec86cbad4 100644
--- a/python/pylibcugraph/pylibcugraph/__init__.py
+++ b/python/pylibcugraph/pylibcugraph/__init__.py
@@ -89,6 +89,8 @@
from pylibcugraph.replicate_edgelist import replicate_edgelist
+from pylibcugraph.k_truss_subgraph import k_truss_subgraph
+
from pylibcugraph.jaccard_coefficients import jaccard_coefficients
from pylibcugraph.overlap_coefficients import overlap_coefficients
diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
index ffb458b409c..29c6d79e08d 100644
--- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
+++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
@@ -176,15 +176,32 @@ cdef extern from "cugraph_c/algorithms.h":
const cugraph_sample_result_t* result
)
+ # Deprecated, use cugraph_sample_result_get_majors
cdef cugraph_type_erased_device_array_view_t* \
cugraph_sample_result_get_sources(
const cugraph_sample_result_t* result
)
+ # Deprecated, use cugraph_sample_result_get_minors
cdef cugraph_type_erased_device_array_view_t* \
cugraph_sample_result_get_destinations(
const cugraph_sample_result_t* result
)
+
+ cdef cugraph_type_erased_device_array_view_t* \
+ cugraph_sample_result_get_majors(
+ const cugraph_sample_result_t* result
+ )
+
+ cdef cugraph_type_erased_device_array_view_t* \
+ cugraph_sample_result_get_minors(
+ const cugraph_sample_result_t* result
+ )
+
+ cdef cugraph_type_erased_device_array_view_t* \
+ cugraph_sample_result_get_major_offsets(
+ const cugraph_sample_result_t* result
+ )
cdef cugraph_type_erased_device_array_view_t* \
cugraph_sample_result_get_index(
@@ -211,11 +228,17 @@ cdef extern from "cugraph_c/algorithms.h":
const cugraph_sample_result_t* result
)
+ cdef cugraph_type_erased_device_array_view_t* \
+ cugraph_sample_result_get_label_hop_offsets(
+ const cugraph_sample_result_t* result
+ )
+
cdef cugraph_type_erased_device_array_view_t* \
cugraph_sample_result_get_start_labels(
const cugraph_sample_result_t* result
)
+ # Deprecated
cdef cugraph_type_erased_device_array_view_t* \
cugraph_sample_result_get_offsets(
const cugraph_sample_result_t* result
@@ -246,10 +269,17 @@ cdef extern from "cugraph_c/algorithms.h":
pass
ctypedef enum cugraph_prior_sources_behavior_t:
- DEFAULT
+ DEFAULT=0
CARRY_OVER
EXCLUDE
+ ctypedef enum cugraph_compression_type_t:
+ COO=0
+ CSR
+ CSC
+ DCSR
+ DCSC
+
cdef cugraph_error_code_t \
cugraph_sampling_options_create(
cugraph_sampling_options_t** options,
@@ -277,7 +307,7 @@ cdef extern from "cugraph_c/algorithms.h":
cdef void \
cugraph_sampling_set_prior_sources_behavior(
cugraph_sampling_options_t* options,
- cugraph_prior_sources_behavior_t value
+ cugraph_prior_sources_behavior_t value,
)
cdef void \
@@ -286,10 +316,22 @@ cdef extern from "cugraph_c/algorithms.h":
bool_t value,
)
+ cdef void \
+ cugraph_sampling_set_compress_per_hop(
+ cugraph_sampling_options_t* options,
+ bool_t value,
+ )
+
+ cdef void \
+ cugraph_sampling_set_compression_type(
+ cugraph_sampling_options_t* options,
+ cugraph_compression_type_t value,
+ )
+
cdef void \
cugraph_sampling_options_free(
cugraph_sampling_options_t* options,
- )
+ )
# uniform random walks
cdef cugraph_error_code_t \
diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd
index 64944e8773f..3c273b7d3fa 100644
--- a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd
+++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd
@@ -253,3 +253,13 @@ cdef extern from "cugraph_c/community_algorithms.h":
cugraph_error_t** error
)
+ ###########################################################################
+ # K truss
+ cdef cugraph_error_code_t \
+ cugraph_k_truss_subgraph(
+ const cugraph_resource_handle_t* handle,
+ cugraph_graph_t* graph,
+ size_t k,
+ bool_t do_expensive_check,
+ cugraph_induced_subgraph_result_t** result,
+ cugraph_error_t** error)
diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd
index 91cc11d6b1c..c32b57f8621 100644
--- a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd
+++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd
@@ -43,23 +43,6 @@ from pylibcugraph._cugraph_c.array cimport (
cdef extern from "cugraph_c/sampling_algorithms.h":
###########################################################################
- # deprecated, should migrate to cugraph_uniform_neighbor_sample
- cdef cugraph_error_code_t cugraph_uniform_neighbor_sample_with_edge_properties(
- const cugraph_resource_handle_t* handle,
- cugraph_graph_t* graph,
- const cugraph_type_erased_device_array_view_t* start_vertices,
- const cugraph_type_erased_device_array_view_t* start_vertex_labels,
- const cugraph_type_erased_device_array_view_t* label_list,
- const cugraph_type_erased_device_array_view_t* label_to_comm_rank,
- const cugraph_type_erased_host_array_view_t* fan_out,
- cugraph_rng_state_t* rng_state,
- bool_t with_replacement,
- bool_t return_hops,
- bool_t do_expensive_check,
- cugraph_sample_result_t** result,
- cugraph_error_t** error
- )
-
cdef cugraph_error_code_t cugraph_uniform_neighbor_sample(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
diff --git a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx
index d11f6994298..9f98b4f37b0 100644
--- a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx
+++ b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx
@@ -20,14 +20,18 @@ from pylibcugraph._cugraph_c.array cimport (
)
from pylibcugraph._cugraph_c.algorithms cimport (
cugraph_sample_result_t,
- cugraph_sample_result_get_sources,
- cugraph_sample_result_get_destinations,
+ cugraph_sample_result_get_major_offsets,
+ cugraph_sample_result_get_majors,
+ cugraph_sample_result_get_minors,
+ cugraph_sample_result_get_label_hop_offsets,
+ cugraph_sample_result_get_sources, # deprecated
+ cugraph_sample_result_get_destinations, # deprecated
cugraph_sample_result_get_edge_weight,
cugraph_sample_result_get_edge_id,
cugraph_sample_result_get_edge_type,
- cugraph_sample_result_get_hop,
+ cugraph_sample_result_get_hop, # deprecated
cugraph_sample_result_get_start_labels,
- cugraph_sample_result_get_offsets,
+ cugraph_sample_result_get_offsets, # deprecated
cugraph_sample_result_get_renumber_map,
cugraph_sample_result_get_renumber_map_offsets,
cugraph_sample_result_free,
@@ -60,23 +64,71 @@ cdef class SamplingResult:
cdef set_ptr(self, cugraph_sample_result_t* sample_result_ptr):
self.c_sample_result_ptr = sample_result_ptr
+ def get_major_offsets(self):
+ if self.c_sample_result_ptr is NULL:
+ raise ValueError("pointer not set, must call set_ptr() with a "
+ "non-NULL value first.")
+
+ cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
+ cugraph_sample_result_get_major_offsets(self.c_sample_result_ptr)
+ )
+ if device_array_view_ptr is NULL:
+ return None
+
+ return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
+ self)
+
+ def get_majors(self):
+ if self.c_sample_result_ptr is NULL:
+ raise ValueError("pointer not set, must call set_ptr() with a "
+ "non-NULL value first.")
+ cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
+ cugraph_sample_result_get_majors(self.c_sample_result_ptr)
+ )
+ if device_array_view_ptr is NULL:
+ return None
+
+ return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
+ self)
+
+ def get_minors(self):
+ if self.c_sample_result_ptr is NULL:
+ raise ValueError("pointer not set, must call set_ptr() with a "
+ "non-NULL value first.")
+ cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
+ cugraph_sample_result_get_minors(self.c_sample_result_ptr)
+ )
+ if device_array_view_ptr is NULL:
+ return None
+
+ return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
+ self)
+
def get_sources(self):
+ # Deprecated
if self.c_sample_result_ptr is NULL:
raise ValueError("pointer not set, must call set_ptr() with a "
"non-NULL value first.")
cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
cugraph_sample_result_get_sources(self.c_sample_result_ptr)
)
+ if device_array_view_ptr is NULL:
+ return None
+
return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
self)
def get_destinations(self):
+ # Deprecated
if self.c_sample_result_ptr is NULL:
raise ValueError("pointer not set, must call set_ptr() with a "
"non-NULL value first.")
cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
cugraph_sample_result_get_destinations(self.c_sample_result_ptr)
)
+ if device_array_view_ptr is NULL:
+ return None
+
return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
self)
@@ -95,6 +147,7 @@ cdef class SamplingResult:
self)
def get_indices(self):
+ # Deprecated
return self.get_edge_weights()
def get_edge_ids(self):
@@ -132,9 +185,26 @@ cdef class SamplingResult:
cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
cugraph_sample_result_get_start_labels(self.c_sample_result_ptr)
)
+ if device_array_view_ptr is NULL:
+ return None
+
return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
self)
+ def get_label_hop_offsets(self):
+ if self.c_sample_result_ptr is NULL:
+ raise ValueError("pointer not set, must call set_ptr() with a "
+ "non-NULL value first.")
+ cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
+ cugraph_sample_result_get_label_hop_offsets(self.c_sample_result_ptr)
+ )
+ if device_array_view_ptr is NULL:
+ return None
+
+ return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
+ self)
+
+ # Deprecated
def get_offsets(self):
if self.c_sample_result_ptr is NULL:
raise ValueError("pointer not set, must call set_ptr() with a "
@@ -142,9 +212,13 @@ cdef class SamplingResult:
cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
cugraph_sample_result_get_offsets(self.c_sample_result_ptr)
)
+ if device_array_view_ptr is NULL:
+ return None
+
return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
self)
+ # Deprecated
def get_hop_ids(self):
if self.c_sample_result_ptr is NULL:
raise ValueError("pointer not set, must call set_ptr() with a "
@@ -152,6 +226,9 @@ cdef class SamplingResult:
cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
cugraph_sample_result_get_hop(self.c_sample_result_ptr)
)
+ if device_array_view_ptr is NULL:
+ return None
+
return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
self)
@@ -162,6 +239,9 @@ cdef class SamplingResult:
cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
cugraph_sample_result_get_renumber_map(self.c_sample_result_ptr)
)
+ if device_array_view_ptr is NULL:
+ return None
+
return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
self)
@@ -172,5 +252,8 @@ cdef class SamplingResult:
cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
cugraph_sample_result_get_renumber_map_offsets(self.c_sample_result_ptr)
)
+ if device_array_view_ptr is NULL:
+ return None
+
return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
self)
\ No newline at end of file
diff --git a/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx b/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx
new file mode 100644
index 00000000000..cc91e76dd55
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx
@@ -0,0 +1,163 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have cython use python 3 syntax
+# cython: language_level = 3
+
+
+from pylibcugraph._cugraph_c.resource_handle cimport (
+ bool_t,
+ cugraph_resource_handle_t,
+)
+from pylibcugraph._cugraph_c.error cimport (
+ cugraph_error_code_t,
+ cugraph_error_t,
+)
+from pylibcugraph._cugraph_c.array cimport (
+ cugraph_type_erased_device_array_view_t,
+)
+from pylibcugraph._cugraph_c.graph cimport (
+ cugraph_graph_t,
+)
+from pylibcugraph._cugraph_c.graph_functions cimport (
+ cugraph_induced_subgraph_result_t,
+ cugraph_induced_subgraph_get_sources,
+ cugraph_induced_subgraph_get_destinations,
+ cugraph_induced_subgraph_get_edge_weights,
+ cugraph_induced_subgraph_get_subgraph_offsets,
+ cugraph_induced_subgraph_result_free,
+)
+from pylibcugraph._cugraph_c.community_algorithms cimport (
+ cugraph_k_truss_subgraph,
+)
+from pylibcugraph.resource_handle cimport (
+ ResourceHandle,
+)
+from pylibcugraph.graphs cimport (
+ _GPUGraph,
+)
+from pylibcugraph.utils cimport (
+ assert_success,
+ copy_to_cupy_array,
+)
+
+
+def k_truss_subgraph(ResourceHandle resource_handle,
+ _GPUGraph graph,
+ size_t k,
+ bool_t do_expensive_check):
+ """
+ Extract k truss of a graph for a specific k.
+
+ Parameters
+ ----------
+ resource_handle : ResourceHandle
+ Handle to the underlying device resources needed for referencing data
+ and running algorithms.
+
+ graph : SGGraph
+ The input graph.
+
+ k: size_t
+ The desired k to be used for extracting the k-truss subgraph.
+
+ do_expensive_check : bool_t
+ If True, performs more extensive tests on the inputs to ensure
+ validitity, at the expense of increased run time.
+
+ Returns
+ -------
+ A tuple of device arrays containing the sources, destinations,
+ edge_weights and edge_offsets.
+
+ Examples
+ --------
+ >>> import pylibcugraph, cupy, numpy
+ >>> srcs = cupy.asarray([0, 1, 1, 3, 1, 4, 2, 0, 2, 1, 2,
+ ... 3, 3, 4, 3, 5, 4, 5], dtype=numpy.int32)
+ >>> dsts = cupy.asarray([1, 0, 3, 1, 4, 1, 0, 2, 1, 2, 3,
+ ... 2, 4, 3, 5, 3, 5, 4], dtype=numpy.int32)
+ >>> weights = cupy.asarray(
+ ... [0.1, 0.1, 2.1, 2.1, 1.1, 1.1, 7.2, 7.2, 2.1, 2.1,
+ ... 1.1, 1.1, 7.2, 7.2, 3.2, 3.2, 6.1, 6.1]
+ ... ,dtype=numpy.float32)
+ >>> k = 2
+ >>> resource_handle = pylibcugraph.ResourceHandle()
+ >>> graph_props = pylibcugraph.GraphProperties(
+ ... is_symmetric=True, is_multigraph=False)
+ >>> G = pylibcugraph.SGGraph(
+ ... resource_handle, graph_props, srcs, dsts, weights,
+ ... store_transposed=False, renumber=False, do_expensive_check=False)
+ >>> (sources, destinations, edge_weights, subgraph_offsets) =
+ ... pylibcugraph.k_truss_subgraph(resource_handle, G, k, False)
+ >>> sources
+ [0 0 1 1 1 1 2 2 2 3 3 3 3 4 4 4 5 5]
+ >>> destinations
+ [1 2 0 2 3 4 0 1 3 1 2 4 5 1 3 5 3 4]
+ >>> edge_weights
+ [0.1 7.2 0.1 2.1 2.1 1.1 7.2 2.1 1.1 2.1 1.1 7.2 3.2 1.1 7.2 6.1 3.2 6.1]
+ >>> subgraph_offsets
+ [0 18]
+
+ """
+ cdef cugraph_resource_handle_t* c_resource_handle_ptr = \
+ resource_handle.c_resource_handle_ptr
+ cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr
+ cdef cugraph_induced_subgraph_result_t* result_ptr
+ cdef cugraph_error_code_t error_code
+ cdef cugraph_error_t* error_ptr
+
+ error_code = cugraph_k_truss_subgraph(c_resource_handle_ptr,
+ c_graph_ptr,
+ k,
+ do_expensive_check,
+ &result_ptr,
+ &error_ptr)
+ assert_success(error_code, error_ptr, "cugraph_k_truss_subgraph")
+
+ # Extract individual device array pointers from result and copy to cupy
+ # arrays for returning.
+ cdef cugraph_type_erased_device_array_view_t* sources_ptr = \
+ cugraph_induced_subgraph_get_sources(result_ptr)
+ cdef cugraph_type_erased_device_array_view_t* destinations_ptr = \
+ cugraph_induced_subgraph_get_destinations(result_ptr)
+ cdef cugraph_type_erased_device_array_view_t* edge_weights_ptr = \
+ cugraph_induced_subgraph_get_edge_weights(result_ptr)
+ cdef cugraph_type_erased_device_array_view_t* subgraph_offsets_ptr = \
+ cugraph_induced_subgraph_get_subgraph_offsets(result_ptr)
+
+
+ # FIXME: Get ownership of the result data instead of performing a copy
+ # for perfomance improvement
+ cupy_sources = copy_to_cupy_array(
+ c_resource_handle_ptr, sources_ptr)
+
+ cupy_destinations = copy_to_cupy_array(
+ c_resource_handle_ptr, destinations_ptr)
+
+ if edge_weights_ptr is not NULL:
+ cupy_edge_weights = copy_to_cupy_array(
+ c_resource_handle_ptr, edge_weights_ptr)
+ else:
+ cupy_edge_weights = None
+
+ # FIXME: Should we keep the offsets array or just drop it from the final
+ # solution?
+ cupy_subgraph_offsets = copy_to_cupy_array(
+ c_resource_handle_ptr, subgraph_offsets_ptr)
+
+
+ # Free pointer
+ cugraph_induced_subgraph_result_free(result_ptr)
+
+ return (cupy_sources, cupy_destinations, cupy_edge_weights, cupy_subgraph_offsets)
diff --git a/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py b/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py
index 74aa6830d24..ac04635edcf 100644
--- a/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py
+++ b/python/pylibcugraph/pylibcugraph/tests/test_uniform_neighbor_sample.py
@@ -266,7 +266,7 @@ def test_neighborhood_sampling_large_sg_graph(gpubenchmark):
def test_sample_result():
"""
- Ensure the SampleResult class returns zero-opy cupy arrays and properly
+ Ensure the SampleResult class returns zero-copy cupy arrays and properly
frees device memory when all references to it are gone and it's garbage
collected.
"""
@@ -304,6 +304,8 @@ def test_sample_result():
assert isinstance(destinations, cp.ndarray)
assert isinstance(indices, cp.ndarray)
+ print("sources:", destinations)
+
# Delete the SampleResult instance. This *should not* free the device
# memory yet since the variables sources, destinations, and indices are
# keeping the refcount >0.
diff --git a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx
index bc2aa9205f1..ce6493c38f5 100644
--- a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx
+++ b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx
@@ -38,6 +38,7 @@ from pylibcugraph._cugraph_c.graph cimport (
from pylibcugraph._cugraph_c.algorithms cimport (
cugraph_sample_result_t,
cugraph_prior_sources_behavior_t,
+ cugraph_compression_type_t,
cugraph_sampling_options_t,
cugraph_sampling_options_create,
cugraph_sampling_options_free,
@@ -46,7 +47,8 @@ from pylibcugraph._cugraph_c.algorithms cimport (
cugraph_sampling_set_prior_sources_behavior,
cugraph_sampling_set_dedupe_sources,
cugraph_sampling_set_renumber_results,
-
+ cugraph_sampling_set_compress_per_hop,
+ cugraph_sampling_set_compression_type,
)
from pylibcugraph._cugraph_c.sampling_algorithms cimport (
cugraph_uniform_neighbor_sample,
@@ -73,6 +75,7 @@ from pylibcugraph._cugraph_c.random cimport (
from pylibcugraph.random cimport (
CuGraphRandomState
)
+import warnings
# TODO accept cupy/numpy random state in addition to raw seed.
def uniform_neighbor_sample(ResourceHandle resource_handle,
@@ -90,7 +93,10 @@ def uniform_neighbor_sample(ResourceHandle resource_handle,
deduplicate_sources=False,
return_hops=False,
renumber=False,
- random_state=None):
+ compression='COO',
+ compress_per_hop=False,
+ random_state=None,
+ return_dict=False,):
"""
Does neighborhood sampling, which samples nodes from a graph based on the
current node's neighbors, with a corresponding fanout value at each hop.
@@ -153,11 +159,27 @@ def uniform_neighbor_sample(ResourceHandle resource_handle,
If True, will renumber the sources and destinations on a
per-batch basis and return the renumber map and batch offsets
in additional to the standard returns.
+
+ compression: str (Optional)
+ Options: COO (default), CSR, CSC, DCSR, DCSR
+ Sets the compression format for the returned samples.
+
+ compress_per_hop: bool (Optional)
+ If False (default), will create a compressed edgelist for the
+ entire batch.
+ If True, will create a separate compressed edgelist per hop within
+ a batch.
random_state: int (Optional)
Random state to use when generating samples. Optional argument,
defaults to a hash of process id, time, and hostname.
(See pylibcugraph.random.CuGraphRandomState)
+
+ return_dict: bool (Optional)
+ Whether to return a dictionary instead of a tuple.
+ Optional argument, defaults to False, returning a tuple.
+ This argument will eventually be deprecated in favor
+ of always returning a dictionary.
Returns
-------
@@ -173,13 +195,16 @@ def uniform_neighbor_sample(ResourceHandle resource_handle,
the renumber map for each batch starts).
"""
- cdef cugraph_resource_handle_t* c_resource_handle_ptr = \
+ cdef cugraph_resource_handle_t* c_resource_handle_ptr = (
resource_handle.c_resource_handle_ptr
+ )
+
cdef cugraph_graph_t* c_graph_ptr = input_graph.c_graph_ptr
cdef bool_t c_deduplicate_sources = deduplicate_sources
cdef bool_t c_return_hops = return_hops
cdef bool_t c_renumber = renumber
+ cdef bool_t c_compress_per_hop = compress_per_hop
assert_CAI_type(start_list, "start_list")
assert_CAI_type(batch_id_list, "batch_id_list", True)
@@ -269,6 +294,23 @@ def uniform_neighbor_sample(ResourceHandle resource_handle,
f'Invalid option {prior_sources_behavior}'
' for prior sources behavior'
)
+
+ cdef cugraph_compression_type_t compression_behavior_e
+ if compression is None or compression == 'COO':
+ compression_behavior_e = cugraph_compression_type_t.COO
+ elif compression == 'CSR':
+ compression_behavior_e = cugraph_compression_type_t.CSR
+ elif compression == 'CSC':
+ compression_behavior_e = cugraph_compression_type_t.CSC
+ elif compression == 'DCSR':
+ compression_behavior_e = cugraph_compression_type_t.DCSR
+ elif compression == 'DCSC':
+ compression_behavior_e = cugraph_compression_type_t.DCSC
+ else:
+ raise ValueError(
+ f'Invalid option {compression}'
+ ' for compression type'
+ )
cdef cugraph_sampling_options_t* sampling_options
error_code = cugraph_sampling_options_create(&sampling_options, &error_ptr)
@@ -279,6 +321,8 @@ def uniform_neighbor_sample(ResourceHandle resource_handle,
cugraph_sampling_set_dedupe_sources(sampling_options, c_deduplicate_sources)
cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior_e)
cugraph_sampling_set_renumber_results(sampling_options, c_renumber)
+ cugraph_sampling_set_compression_type(sampling_options, compression_behavior_e)
+ cugraph_sampling_set_compress_per_hop(sampling_options, c_compress_per_hop)
error_code = cugraph_uniform_neighbor_sample(
c_resource_handle_ptr,
@@ -311,26 +355,74 @@ def uniform_neighbor_sample(ResourceHandle resource_handle,
# Get cupy "views" of the individual arrays to return. These each increment
# the refcount on the SamplingResult instance which will keep the data alive
# until all references are removed and the GC runs.
+ # TODO Return everything that isn't null in release 23.12
if with_edge_properties:
- cupy_sources = result.get_sources()
- cupy_destinations = result.get_destinations()
+ cupy_majors = result.get_majors()
+ cupy_major_offsets = result.get_major_offsets()
+ cupy_minors = result.get_minors()
cupy_edge_weights = result.get_edge_weights()
cupy_edge_ids = result.get_edge_ids()
cupy_edge_types = result.get_edge_types()
cupy_batch_ids = result.get_batch_ids()
- cupy_offsets = result.get_offsets()
- cupy_hop_ids = result.get_hop_ids()
+ cupy_label_hop_offsets = result.get_label_hop_offsets()
if renumber:
cupy_renumber_map = result.get_renumber_map()
cupy_renumber_map_offsets = result.get_renumber_map_offsets()
- return (cupy_sources, cupy_destinations, cupy_edge_weights, cupy_edge_ids, cupy_edge_types, cupy_batch_ids, cupy_offsets, cupy_hop_ids, cupy_renumber_map, cupy_renumber_map_offsets)
+ # TODO drop the placeholder for hop ids in release 23.12
+ if return_dict:
+ return {
+ 'major_offsets': cupy_major_offsets,
+ 'majors': cupy_majors,
+ 'minors': cupy_minors,
+ 'weight': cupy_edge_weights,
+ 'edge_id': cupy_edge_ids,
+ 'edge_type': cupy_edge_types,
+ 'batch_id': cupy_batch_ids,
+ 'label_hop_offsets': cupy_label_hop_offsets,
+ 'hop_id': None,
+ 'renumber_map': cupy_renumber_map,
+ 'renumber_map_offsets': cupy_renumber_map_offsets
+ }
+ else:
+ cupy_majors = cupy_major_offsets if cupy_majors is None else cupy_majors
+ return (cupy_majors, cupy_minors, cupy_edge_weights, cupy_edge_ids, cupy_edge_types, cupy_batch_ids, cupy_label_hop_offsets, None, cupy_renumber_map, cupy_renumber_map_offsets)
else:
- return (cupy_sources, cupy_destinations, cupy_edge_weights, cupy_edge_ids, cupy_edge_types, cupy_batch_ids, cupy_offsets, cupy_hop_ids)
+ cupy_hop_ids = result.get_hop_ids() # FIXME remove this
+ if return_dict:
+ return {
+ 'major_offsets': cupy_major_offsets,
+ 'majors': cupy_majors,
+ 'minors': cupy_minors,
+ 'weight': cupy_edge_weights,
+ 'edge_id': cupy_edge_ids,
+ 'edge_type': cupy_edge_types,
+ 'batch_id': cupy_batch_ids,
+ 'label_hop_offsets': cupy_label_hop_offsets,
+ 'hop_id': cupy_hop_ids,
+ }
+ else:
+ cupy_majors = cupy_major_offsets if cupy_majors is None else cupy_majors
+ return (cupy_majors, cupy_minors, cupy_edge_weights, cupy_edge_ids, cupy_edge_types, cupy_batch_ids, cupy_label_hop_offsets, cupy_hop_ids)
else:
+ # TODO this is deprecated, remove it in release 23.12
+ warnings.warn(
+ "Calling uniform_neighbor_sample with the 'with_edge_properties' argument is deprecated."
+ " Starting in release 23.12, this argument will be removed in favor of behaving like the "
+ "with_edge_properties=True option, returning whatever properties are in the graph.",
+ FutureWarning,
+ )
+
cupy_sources = result.get_sources()
cupy_destinations = result.get_destinations()
cupy_indices = result.get_indices()
- return (cupy_sources, cupy_destinations, cupy_indices)
+ if return_dict:
+ return {
+ 'sources': cupy_sources,
+ 'destinations': cupy_destinations,
+ 'indices': cupy_indices
+ }
+ else:
+ return (cupy_sources, cupy_destinations, cupy_indices)