diff --git a/.dockerignore b/.dockerignore index 6d29c8e919e..c274b811f6c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,2 +1,2 @@ # Ignore cmake builds from local machine that might have occured before attempting Docker build. Including these files will cause CMake cache conflict issues -/cpp/build \ No newline at end of file +/cpp/build diff --git a/.github/workflows/add-to-project.yml b/.github/workflows/add-to-project.yml index bd4d10bcb54..13ad5079cbc 100644 --- a/.github/workflows/add-to-project.yml +++ b/.github/workflows/add-to-project.yml @@ -4,7 +4,7 @@ on: issues: types: - opened - + pull_request_target: types: - opened diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6b7ff14417c..3d893e0e562 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,11 +5,13 @@ exclude: '^thirdparty' repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-added-large-files - id: debug-statements + - id: end-of-file-fixer - id: mixed-line-ending + - id: trailing-whitespace - repo: https://github.com/psf/black rev: 22.10.0 hooks: diff --git a/benchmarks/cugraph-dgl/pytest-based/README.MD b/benchmarks/cugraph-dgl/pytest-based/README.MD index 9e73d0b90c7..ff696ffcca3 100755 --- a/benchmarks/cugraph-dgl/pytest-based/README.MD +++ b/benchmarks/cugraph-dgl/pytest-based/README.MD @@ -1,10 +1,10 @@ -## Run Benchmarks +## Run Benchmarks -#### SG +#### SG ``` pytest bench_cugraph_dgl_uniform_neighbor_sample.py -k "SG and fanout_10_25 and rmat_24_4" --benchmark-save='1_rmat_24_4.json' ``` -#### MG +#### MG ``` DASK_NUM_WORKERS=2 pytest bench_cugraph_dgl_uniform_neighbor_sample.py -k "MG and fanout_10_25 and rmat_24_16" --benchmark-save='2_rmat_24_8.json' diff --git a/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py b/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py index 157c64b0b20..face22c9283 100644 --- a/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py +++ b/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ uniform_neighbor_sample, ) from cugraph.generators import rmat -from cugraph.experimental import datasets +from cugraph import datasets from cugraph.dask import uniform_neighbor_sample as uniform_neighbor_sample_mg from cugraph_benchmarking import params diff --git a/benchmarks/cugraph/standalone/bulk_sampling/README.md b/benchmarks/cugraph/standalone/bulk_sampling/README.md index bb01133c52f..2d09466fb2f 100644 --- a/benchmarks/cugraph/standalone/bulk_sampling/README.md +++ b/benchmarks/cugraph/standalone/bulk_sampling/README.md @@ -16,21 +16,21 @@ Required: the samples will be written to a new folder in /home/samples that contains information about the sampling run as well as the time of the run. - + --dataset_root The folder where datasets are stored. Uses the format described in the input format section. - + --datasets Comma-separated list of datasets; can specify ogb or rmat (i.e. ogb_papers100M[2],rmat_22_16). For ogb datasets, can provide replication factor using brackets. Will attempt to read from dataset_root/. - + Optional: --fanouts Comma-separated list of fanout values (i.e. [10, 25]). The default fanout is [10, 25]. - + --batch_sizes Comma-separated list of batch sizes (i.e. 500, 1000). Defaults to "512,1024" @@ -39,7 +39,7 @@ Optional: Comma-separated list of seeds per call. Controls the number of input seed vertices processed in a single sampling call. Defaults to 524288 - + --reverse_edges Whether to reverse the edges of the input edgelist. Should be set to False for PyG and True for DGL. Defaults to False (PyG). @@ -52,8 +52,8 @@ Optional: --random_seed Seed for random number generation. Defaults to '62' - - + + ### Input Format The script expects its input data in the following format: ``` @@ -159,4 +159,4 @@ GPUs per node is currently unsupported by this script but should be possible in ### Output The results of training will be outputted to the logs directory with an `output.txt` file for each worker. -These will be overwritten upon each run. Accuracy is only reported on rank 0. \ No newline at end of file +These will be overwritten upon each run. Accuracy is only reported on rank 0. diff --git a/benchmarks/cugraph/standalone/bulk_sampling/run_sampling.sh b/benchmarks/cugraph/standalone/bulk_sampling/run_sampling.sh index 41792c0b63a..1b3085dcc9a 100644 --- a/benchmarks/cugraph/standalone/bulk_sampling/run_sampling.sh +++ b/benchmarks/cugraph/standalone/bulk_sampling/run_sampling.sh @@ -67,7 +67,7 @@ handleTimeout 120 python ${MG_UTILS_DIR}/wait_for_workers.py \ DASK_STARTUP_ERRORCODE=$LAST_EXITCODE -echo $SLURM_NODEID +echo $SLURM_NODEID if [[ $SLURM_NODEID == 0 ]]; then echo "Launching Python Script" python ${SCRIPTS_DIR}/cugraph_bulk_sampling.py \ @@ -78,7 +78,7 @@ if [[ $SLURM_NODEID == 0 ]]; then --batch_sizes $BATCH_SIZE \ --seeds_per_call_opts "524288" \ --num_epochs $NUM_EPOCHS \ - --random_seed 42 + --random_seed 42 echo "DONE" > ${SAMPLES_DIR}/status.txt fi @@ -108,4 +108,4 @@ sleep 2 if [[ $SLURM_NODEID == 0 ]]; then rm ${SAMPLES_DIR}/status.txt -fi \ No newline at end of file +fi diff --git a/benchmarks/cugraph/standalone/bulk_sampling/run_train_job.sh b/benchmarks/cugraph/standalone/bulk_sampling/run_train_job.sh index 977745a9593..27ae0dc7788 100755 --- a/benchmarks/cugraph/standalone/bulk_sampling/run_train_job.sh +++ b/benchmarks/cugraph/standalone/bulk_sampling/run_train_job.sh @@ -16,7 +16,7 @@ #SBATCH -p luna #SBATCH -J datascience_rapids_cugraphgnn-papers:bulkSamplingPyG #SBATCH -N 1 -#SBATCH -t 00:25:00 +#SBATCH -t 00:25:00 CONTAINER_IMAGE=${CONTAINER_IMAGE:="please_specify_container"} SCRIPTS_DIR=$(pwd) @@ -81,4 +81,3 @@ srun \ --fanout $FANOUT \ --replication_factor $REPLICATION_FACTOR \ --num_epochs $NUM_EPOCHS - diff --git a/benchmarks/dgl/README.md b/benchmarks/dgl/README.md index c24e6a6df18..82762588ee8 100644 --- a/benchmarks/dgl/README.md +++ b/benchmarks/dgl/README.md @@ -13,4 +13,4 @@ pytest dgl_benchmark.py::bench_dgl_pure_gpu ## For UVA Benchmarks ``` pytest dgl_benchmark.py::bench_dgl_uva -``` \ No newline at end of file +``` diff --git a/benchmarks/shared/build_cugraph_ucx/README.MD b/benchmarks/shared/build_cugraph_ucx/README.MD index b18adfd3b03..06ab37a048d 100644 --- a/benchmarks/shared/build_cugraph_ucx/README.MD +++ b/benchmarks/shared/build_cugraph_ucx/README.MD @@ -6,10 +6,10 @@ docker build -f cugraph_ucx.dockerfile . -t cugraph_ucx docker run --privileged -it --gpus=all --net=host cugraph_ucx /bin/bash #### Client Bandwidth Test -python3 test_client_bandwidth.py +python3 test_client_bandwidth.py ```bash -(base) root@exp02:/home# python3 test_client_bandwidth.py +(base) root@exp02:/home# python3 test_client_bandwidth.py 2022-12-19 13:31:30,867 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize 2022-12-19 13:31:30,867 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize 2022-12-19 13:31:30,891 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize @@ -30,8 +30,8 @@ Bandwidth = 5.2037 gb/s #### Sampling Test python3 test_cugraph_sampling.py ```bash -test_client_bandwidth.py test_cugraph_sampling.py -(base) root@exp02:/home# python3 test_cugraph_sampling.py +test_client_bandwidth.py test_cugraph_sampling.py +(base) root@exp02:/home# python3 test_cugraph_sampling.py [1671456769.722931] [exp02:93 :0] parser.c:1989 UCX WARN unused environment variable: UCX_MEMTYPE_CACHE (maybe: UCX_MEMTYPE_CACHE?) [1671456769.722931] [exp02:93 :0] parser.c:1989 UCX WARN (set UCX_WARN_UNUSED_ENV_VARS=n to suppress this warning) 2022-12-19 13:32:56,228 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize @@ -54,4 +54,4 @@ Sampling 1,000 took = 69.15879249572754 ms Sampling 10,000 took = 89.63620662689209 ms Sampling 100,000 took = 135.9888792037964 ms ----------------------------------------Completed Test---------------------------------------- -``` \ No newline at end of file +``` diff --git a/benchmarks/shared/build_cugraph_ucx/build-ucx.sh b/benchmarks/shared/build_cugraph_ucx/build-ucx.sh index 758f9b57231..df9290cdfe6 100644 --- a/benchmarks/shared/build_cugraph_ucx/build-ucx.sh +++ b/benchmarks/shared/build_cugraph_ucx/build-ucx.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 set -ex @@ -16,4 +16,4 @@ mkdir build-linux && cd build-linux --enable-mt --enable-numa --with-gnu-ld --with-rdmacm --with-verbs \ --with-cuda=${CUDA_HOME} \ ${CONFIGURE_ARGS} -make -j install \ No newline at end of file +make -j install diff --git a/benchmarks/shared/build_cugraph_ucx/cugraph_ucx.dockerfile b/benchmarks/shared/build_cugraph_ucx/cugraph_ucx.dockerfile index 452801d9774..f51bee4c320 100644 --- a/benchmarks/shared/build_cugraph_ucx/cugraph_ucx.dockerfile +++ b/benchmarks/shared/build_cugraph_ucx/cugraph_ucx.dockerfile @@ -55,7 +55,7 @@ RUN gpuci_mamba_retry install -y -c pytorch -c rapidsai-nightly -c rapidsai -c c tqdm -# Build ucx from source with IB support +# Build ucx from source with IB support # on 1.14.x RUN conda remove --force -y ucx ucx-proc diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 30a1c98c106..9de1750de81 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -54,12 +54,12 @@ cd "${package_dir}" python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check -# pure-python packages should not have auditwheel run on them. +# pure-python packages should be marked as pure, and not have auditwheel run on them. if [[ ${package_name} == "nx-cugraph" ]] || \ [[ ${package_name} == "cugraph-dgl" ]] || \ [[ ${package_name} == "cugraph-pyg" ]] || \ [[ ${package_name} == "cugraph-equivariant" ]]; then - RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 dist + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 dist else mkdir -p final_dist python -m auditwheel repair -w final_dist dist/* diff --git a/ci/test.sh b/ci/test.sh index b3adc80c593..f20fc40f85a 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -105,7 +105,7 @@ if hasArg "--run-python-tests"; then # rmat is not tested because of MG testing pytest --cache-clear --junitxml=${CUGRAPH_ROOT}/junit-cugraph-pytests.xml -v --cov-config=.coveragerc --cov=cugraph_pyg --cov-report=xml:${WORKSPACE}/python/cugraph_pyg/cugraph-coverage.xml --cov-report term --ignore=raft --ignore=tests/mg --ignore=tests/int --ignore=tests/generators --benchmark-disable echo "Ran Python pytest for cugraph_pyg : return code was: $?, test script exit code is now: $EXITCODE" - + echo "Python pytest for cugraph-service (single-GPU only)..." cd ${CUGRAPH_ROOT}/python/cugraph-service pytest -sv --cache-clear --junitxml=${CUGRAPH_ROOT}/junit-cugraph-service-pytests.xml --benchmark-disable -k "not mg" ./tests diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 8c5832e412f..158704e08d1 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -11,8 +11,13 @@ python_package_name=$(echo ${package_name}|sed 's/-/_/g') mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +# nx-cugraph is a pure wheel, which is part of generating the download path +if [[ "${package_name}" == "nx-cugraph" ]]; then + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist +else + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +fi # use 'ls' to expand wildcard before adding `[extra]` requires for pip -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist # pip creates wheels using python package names python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh index 367b169bd13..827ad487115 100755 --- a/ci/test_wheel_cugraph-dgl.sh +++ b/ci/test_wheel_cugraph-dgl.sh @@ -17,7 +17,7 @@ RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f python -m pip install ./local-deps/*.whl # use 'ls' to expand wildcard before adding `[extra]` requires for pip -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist # pip creates wheels using python package names python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] diff --git a/ci/test_wheel_cugraph-equivariant.sh b/ci/test_wheel_cugraph-equivariant.sh index f054780b03a..cb952055f06 100755 --- a/ci/test_wheel_cugraph-equivariant.sh +++ b/ci/test_wheel_cugraph-equivariant.sh @@ -12,7 +12,7 @@ mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" # use 'ls' to expand wildcard before adding `[extra]` requires for pip -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist # pip creates wheels using python package names python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 6e44e1ad958..50cbfb3e1fe 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -17,7 +17,7 @@ RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f python -m pip install ./local-deps/*.whl # use 'ls' to expand wildcard before adding `[extra]` requires for pip -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist # pip creates wheels using python package names python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c812cd8e4b3..a3392627fb8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -423,6 +423,7 @@ add_library(cugraph_c src/c_api/core_number.cpp src/c_api/core_result.cpp src/c_api/extract_ego.cpp + src/c_api/ecg.cpp src/c_api/k_core.cpp src/c_api/hierarchical_clustering_result.cpp src/c_api/induced_subgraph.cpp diff --git a/cpp/cmake/thirdparty/get_nccl.cmake b/cpp/cmake/thirdparty/get_nccl.cmake index 118ae377049..bb2500c67e2 100644 --- a/cpp/cmake/thirdparty/get_nccl.cmake +++ b/cpp/cmake/thirdparty/get_nccl.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -32,7 +32,3 @@ function(find_and_configure_nccl) endfunction() find_and_configure_nccl() - - - - diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index b369183a262..50d4bc63e9f 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -1,6 +1,6 @@ # cuGraph C++ Developer Guide -This document serves as a guide for contributors to cuGraph C++ code. Developers should also refer +This document serves as a guide for contributors to cuGraph C++ code. Developers should also refer to these additional files for further documentation of cuGraph best practices. * [Documentation Guide](TODO) for guidelines on documenting cuGraph code. @@ -9,7 +9,7 @@ to these additional files for further documentation of cuGraph best practices. # Overview -cuGraph includes a C++ library that provides GPU-accelerated graph algorithms for processing +cuGraph includes a C++ library that provides GPU-accelerated graph algorithms for processing sparse graphs. ## Lexicon @@ -27,12 +27,12 @@ and weight[i]. # Directory Structure and File Naming -External/public cuGraph APIs are grouped based on functionality into an appropriately titled -header file in `cugraph/cpp/include/`. For example, `cugraph/cpp/include/graph.hpp` -contains the definition of the (legacy) graph objects. Note the `.hpp` +External/public cuGraph APIs are grouped based on functionality into an appropriately titled +header file in `cugraph/cpp/include/`. For example, `cugraph/cpp/include/graph.hpp` +contains the definition of the (legacy) graph objects. Note the `.hpp` file extension used to indicate a C++ header file. -Header files should use the `#pragma once` include guard. +Header files should use the `#pragma once` include guard. ## File extensions @@ -50,8 +50,8 @@ algorithm APIs with a device execution policy (always `rmm::exec_policy` in cuGr ## Code and Documentation Style and Formatting -cuGraph code uses [snake_case](https://en.wikipedia.org/wiki/Snake_case) for all names except in a -few cases: unit tests and test case names may use Pascal case, aka +cuGraph code uses [snake_case](https://en.wikipedia.org/wiki/Snake_case) for all names except in a +few cases: unit tests and test case names may use Pascal case, aka [UpperCamelCase](https://en.wikipedia.org/wiki/Camel_case). We do not use [Hungarian notation](https://en.wikipedia.org/wiki/Hungarian_notation), except for the following examples: * device data variables should be prefaced by d_ if it makes the intent clearer @@ -67,7 +67,7 @@ void algorithm_function(graph_t const &g) } template -class utility_class +class utility_class { ... private: @@ -75,9 +75,9 @@ class utility_class } ``` -C++ formatting is enforced using `clang-format`. You should configure `clang-format` on your -machine to use the `cugraph/cpp/.clang-format` configuration file, and run `clang-format` on all -changed code before committing it. The easiest way to do this is to configure your editor to +C++ formatting is enforced using `clang-format`. You should configure `clang-format` on your +machine to use the `cugraph/cpp/.clang-format` configuration file, and run `clang-format` on all +changed code before committing it. The easiest way to do this is to configure your editor to "format on save". Aspects of code style not discussed in this document and not automatically enforceable are typically @@ -85,10 +85,10 @@ caught during code review, or not enforced. ### C++ Guidelines -In general, we recommend following -[C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines). We also -recommend watching Sean Parent's [C++ Seasoning talk](https://www.youtube.com/watch?v=W2tWOdzgXHA), -and we try to follow his rules: "No raw loops. No raw pointers. No raw synchronization primitives." +In general, we recommend following +[C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines). We also +recommend watching Sean Parent's [C++ Seasoning talk](https://www.youtube.com/watch?v=W2tWOdzgXHA), +and we try to follow his rules: "No raw loops. No raw pointers. No raw synchronization primitives." * Prefer algorithms from STL and Thrust to raw loops. * Prefer cugraph and RMM to raw pointers and raw memory allocation. @@ -99,18 +99,18 @@ Documentation is discussed in the [Documentation Guide](TODO). The following guidelines apply to organizing `#include` lines. - * Group includes by library (e.g. cuGraph, RMM, Thrust, STL). `clang-format` will respect the + * Group includes by library (e.g. cuGraph, RMM, Thrust, STL). `clang-format` will respect the groupings and sort the individual includes within a group lexicographically. * Separate groups by a blank line. - * Order the groups from "nearest" to "farthest". In other words, local includes, then includes - from other RAPIDS libraries, then includes from related libraries, like ``, then - includes from dependencies installed with cuGraph, and then standard headers (for example ``, + * Order the groups from "nearest" to "farthest". In other words, local includes, then includes + from other RAPIDS libraries, then includes from related libraries, like ``, then + includes from dependencies installed with cuGraph, and then standard headers (for example ``, ``). * Use <> instead of "" unless the header is in the same directory as the source file. * Tools like `clangd` often auto-insert includes when they can, but they usually get the grouping and brackets wrong. - * Always check that includes are only necessary for the file in which they are included. - Try to avoid excessive including especially in header files. Double check this when you remove + * Always check that includes are only necessary for the file in which they are included. + Try to avoid excessive including especially in header files. Double check this when you remove code. # cuGraph Data Structures @@ -120,14 +120,14 @@ data structures you will use when developing cuGraph code. ## Views and Ownership -Resource ownership is an essential concept in cuGraph. In short, an "owning" object owns a -resource (such as device memory). It acquires that resource during construction and releases the +Resource ownership is an essential concept in cuGraph. In short, an "owning" object owns a +resource (such as device memory). It acquires that resource during construction and releases the resource in destruction ([RAII](https://en.cppreference.com/w/cpp/language/raii)). A "non-owning" object does not own resources. Any class in cuGraph with the `*_view` suffix is non-owning. ## `rmm::device_memory_resource` -cuGraph allocates all device memory via RMM memory resources (MR). See the +cuGraph allocates all device memory via RMM memory resources (MR). See the [RMM documentation](https://github.com/rapidsai/rmm/blob/main/README.md) for details. ## Streams @@ -142,29 +142,29 @@ cuGraph code generally eschews raw pointers and direct memory allocation. Use RM use `device_memory_resource`(*)s for device memory allocation with automated lifetime management. #### `rmm::device_buffer` -Allocates a specified number of bytes of untyped, uninitialized device memory using a -`device_memory_resource`. If no resource is explicitly provided, uses -`rmm::mr::get_current_device_resource()`. +Allocates a specified number of bytes of untyped, uninitialized device memory using a +`device_memory_resource`. If no resource is explicitly provided, uses +`rmm::mr::get_current_device_resource()`. -`rmm::device_buffer` is movable and copyable on a stream. A copy performs a deep copy of the -`device_buffer`'s device memory on the specified stream, whereas a move moves ownership of the +`rmm::device_buffer` is movable and copyable on a stream. A copy performs a deep copy of the +`device_buffer`'s device memory on the specified stream, whereas a move moves ownership of the device memory from one `device_buffer` to another. ```c++ -// Allocates at least 100 bytes of uninitialized device memory +// Allocates at least 100 bytes of uninitialized device memory // using the specified resource and stream -rmm::device_buffer buff(100, stream, mr); +rmm::device_buffer buff(100, stream, mr); void * raw_data = buff.data(); // Raw pointer to underlying device memory // Deep copies `buff` into `copy` on `stream` -rmm::device_buffer copy(buff, stream); +rmm::device_buffer copy(buff, stream); // Moves contents of `buff` into `moved_to` -rmm::device_buffer moved_to(std::move(buff)); +rmm::device_buffer moved_to(std::move(buff)); custom_memory_resource *mr...; // Allocates 100 bytes from the custom_memory_resource -rmm::device_buffer custom_buff(100, mr, stream); +rmm::device_buffer custom_buff(100, mr, stream); ``` #### `rmm::device_uvector` @@ -173,7 +173,7 @@ Similar to a `rmm::device_vector`, allocates a contiguous set of elements in dev key differences: - As an optimization, elements are uninitialized and no synchronization occurs at construction. This limits the types `T` to trivially copyable types. -- All operations are stream ordered (i.e., they accept a `cuda_stream_view` specifying the stream +- All operations are stream ordered (i.e., they accept a `cuda_stream_view` specifying the stream on which the operation is performed). ## Namespaces @@ -188,12 +188,12 @@ namespace cugraph{ ### Internal -Many functions are not meant for public use, so place them in either the `detail` or an *anonymous* +Many functions are not meant for public use, so place them in either the `detail` or an *anonymous* namespace, depending on the situation. #### `detail` namespace -Functions or objects that will be used across *multiple* translation units (i.e., source files), +Functions or objects that will be used across *multiple* translation units (i.e., source files), should be exposed in an internal header file and placed in the `detail` namespace. Example: ```c++ @@ -207,7 +207,7 @@ void reusable_helper_function(...); #### Anonymous namespace -Functions or objects that will only be used in a *single* translation unit should be defined in an +Functions or objects that will only be used in a *single* translation unit should be defined in an *anonymous* namespace in the source file where it is used. Example: ```c++ @@ -217,12 +217,12 @@ void isolated_helper_function(...); } // anonymous namespace ``` -[**Anonymous namespaces should *never* be used in a header file.**](https://wiki.sei.cmu.edu/confluence/display/cplusplus/DCL59-CPP.+Do+not+define+an+unnamed+namespace+in+a+header+file) +[**Anonymous namespaces should *never* be used in a header file.**](https://wiki.sei.cmu.edu/confluence/display/cplusplus/DCL59-CPP.+Do+not+define+an+unnamed+namespace+in+a+header+file) # Error Handling -cuGraph follows conventions (and provides utilities) enforcing compile-time and run-time -conditions and detecting and handling CUDA errors. Communication of errors is always via C++ +cuGraph follows conventions (and provides utilities) enforcing compile-time and run-time +conditions and detecting and handling CUDA errors. Communication of errors is always via C++ exceptions. ## Runtime Conditions @@ -234,13 +234,13 @@ Example usage: CUGRAPH_EXPECTS(lhs.type() == rhs.type(), "Column type mismatch"); ``` -The first argument is the conditional expression expected to resolve to `true` under normal -conditions. If the conditional evaluates to `false`, then an error has occurred and an instance of `cugraph::logic_error` is thrown. The second argument to `CUGRAPH_EXPECTS` is a short description of the -error that has occurred and is used for the exception's `what()` message. +The first argument is the conditional expression expected to resolve to `true` under normal +conditions. If the conditional evaluates to `false`, then an error has occurred and an instance of `cugraph::logic_error` is thrown. The second argument to `CUGRAPH_EXPECTS` is a short description of the +error that has occurred and is used for the exception's `what()` message. -There are times where a particular code path, if reached, should indicate an error no matter what. -For example, often the `default` case of a `switch` statement represents an invalid alternative. -Use the `CUGRAPH_FAIL` macro for such errors. This is effectively the same as calling +There are times where a particular code path, if reached, should indicate an error no matter what. +For example, often the `default` case of a `switch` statement represents an invalid alternative. +Use the `CUGRAPH_FAIL` macro for such errors. This is effectively the same as calling `CUGRAPH_EXPECTS(false, reason)`. Example: @@ -250,8 +250,8 @@ CUGRAPH_FAIL("This code path should not be reached."); ### CUDA Error Checking -Use the `CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. This -macro throws a `cugraph::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. The +Use the `CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. This +macro throws a `cugraph::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. The thrown exception includes a description of the CUDA error code in it's `what()` message. Example: diff --git a/cpp/doxygen/main_page.md b/cpp/doxygen/main_page.md index 3ff51a5aa43..287b7650286 100644 --- a/cpp/doxygen/main_page.md +++ b/cpp/doxygen/main_page.md @@ -1,3 +1,3 @@ # libcugraph -libcugraph is a C++ GPU Accelerated Graph Algorithm library. +libcugraph is a C++ GPU Accelerated Graph Algorithm library. diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index cd2dafc5217..5c29604a5a7 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2135,6 +2135,172 @@ rmm::device_uvector overlap_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check = false); +/** + * @brief Compute Jaccard all pairs similarity coefficient + * + * Similarity is computed for all pairs of vertices. Note that in a sparse + * graph, many of the vertex pairs will have a score of zero. We actually + * compute similarity only for vertices that are two hop neighbors within + * the graph, since vertices that are not two hop neighbors will have + * a score of 0. + * + * If @p vertices is specified we will compute similarity on two hop + * neighbors the @p vertices. If @p vertices is not specified it will + * compute similarity on all two hop neighbors in the graph. + * + * If @p topk is specified only the top @p topk scoring vertex pairs + * will be returned, if not specified then scores for all computed vertex pairs + * will be returned. + * + * Note the list of two hop neighbors in the entire graph might be a large + * number of vertex pairs. If the graph is dense enough it could be as large + * as the the number of vertices squared, which might run out of memory. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p + * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume + * a weight of 1 for all edges. + * @param vertices optional device span defining the seed vertices. In a multi-gpu context the + * vertices should be local to this GPU. + * @param topk optional specification of the how many of the top scoring vertex pairs should be + * returned + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple containing three device vectors (v1, v2, score) of the same length. Corresponding + * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying + * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2. + * If @p topk was specified then the vectors will be no longer than @p topk elements. In a + * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they + * will be returned on the local GPU for vertex v1. + */ +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check = false); + +/** + * @brief Compute Sorensen similarity coefficient + * + * Similarity is computed for all pairs of vertices. Note that in a sparse + * graph, many of the vertex pairs will have a score of zero. We actually + * compute similarity only for vertices that are two hop neighbors within + * the graph, since vertices that are not two hop neighbors will have + * a score of 0. + * + * If @p vertices is specified we will compute similarity on two hop + * neighbors the @p vertices. If @p vertices is not specified it will + * compute similarity on all two hop neighbors in the graph. + * + * If @p topk is specified only the top @p topk scoring vertex pairs + * will be returned, if not specified then scores for all computed vertex pairs + * will be returned. + * + * Note the list of two hop neighbors in the entire graph might be a large + * number of vertex pairs. If the graph is dense enough it could be as large + * as the the number of vertices squared, which might run out of memory. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p + * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume + * a weight of 1 for all edges. + * @param vertices optional device span defining the seed vertices. + * @param topk optional specification of the how many of the top scoring vertex pairs should be + * returned + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple containing three device vectors (v1, v2, score) of the same length. Corresponding + * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying + * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2. + * If @p topk was specified then the vectors will be no longer than @p topk elements. In a + * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they + * will be returned on the local GPU for vertex v1. + */ +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check = false); + +/** + * @brief Compute overlap similarity coefficient + * + * Similarity is computed for all pairs of vertices. Note that in a sparse + * graph, many of the vertex pairs will have a score of zero. We actually + * compute similarity only for vertices that are two hop neighbors within + * the graph, since vertices that are not two hop neighbors will have + * a score of 0. + * + * If @p vertices is specified we will compute similarity on two hop + * neighbors the @p vertices. If @p vertices is not specified it will + * compute similarity on all two hop neighbors in the graph. + * + * If @p topk is specified only the top @p topk scoring vertex pairs + * will be returned, if not specified then scores for all computed vertex pairs + * will be returned. + * + * Note the list of two hop neighbors in the entire graph might be a large + * number of vertex pairs. If the graph is dense enough it could be as large + * as the the number of vertices squared, which might run out of memory. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p + * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume + * a weight of 1 for all edges. + * @param vertices optional device span defining the seed vertices. + * @param topk optional specification of the how many of the top scoring vertex pairs should be + * returned + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple containing three device vectors (v1, v2, score) of the same length. Corresponding + * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying + * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2. + * If @p topk was specified then the vectors will be no longer than @p topk elements. In a + * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they + * will be returned on the local GPU for vertex v1. + */ +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check = false); + /* * @brief Enumerate K-hop neighbors * diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index 5b66d978b54..d3917a3e851 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -37,51 +37,52 @@ namespace cugraph { namespace detail { -template -std::tuple, std::vector> compute_offset_aligned_edge_chunks( +template +std::tuple, std::vector> compute_offset_aligned_element_chunks( raft::handle_t const& handle, - edge_t const* offsets, - vertex_t num_vertices, - edge_t num_edges, - size_t approx_edge_chunk_size) + raft::device_span offsets, + offset_t num_elements, + vertex_t approx_element_chunk_size) { auto search_offset_first = thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{1}), cuda::proclaim_return_type( - [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; })); - auto num_chunks = (num_edges + approx_edge_chunk_size - 1) / approx_edge_chunk_size; + [approx_element_chunk_size] __device__(auto i) { return i * approx_element_chunk_size; })); + auto num_chunks = (num_elements + approx_element_chunk_size - 1) / approx_element_chunk_size; if (num_chunks > 1) { - rmm::device_uvector d_vertex_offsets(num_chunks - 1, handle.get_stream()); + rmm::device_uvector d_chunk_offsets(num_chunks - 1, handle.get_stream()); thrust::lower_bound(handle.get_thrust_policy(), - offsets, - offsets + num_vertices + 1, + offsets.begin(), + offsets.end(), search_offset_first, - search_offset_first + d_vertex_offsets.size(), - d_vertex_offsets.begin()); - rmm::device_uvector d_edge_offsets(d_vertex_offsets.size(), handle.get_stream()); + search_offset_first + d_chunk_offsets.size(), + d_chunk_offsets.begin()); + rmm::device_uvector d_element_offsets(d_chunk_offsets.size(), handle.get_stream()); thrust::gather(handle.get_thrust_policy(), - d_vertex_offsets.begin(), - d_vertex_offsets.end(), - offsets, - d_edge_offsets.begin()); - std::vector h_edge_offsets(num_chunks + 1, edge_t{0}); - h_edge_offsets.back() = num_edges; - raft::update_host( - h_edge_offsets.data() + 1, d_edge_offsets.data(), d_edge_offsets.size(), handle.get_stream()); - std::vector h_vertex_offsets(num_chunks + 1, vertex_t{0}); - h_vertex_offsets.back() = num_vertices; - raft::update_host(h_vertex_offsets.data() + 1, - d_vertex_offsets.data(), - d_vertex_offsets.size(), + d_chunk_offsets.begin(), + d_chunk_offsets.end(), + offsets.begin(), + d_element_offsets.begin()); + std::vector h_element_offsets(num_chunks + 1, offset_t{0}); + h_element_offsets.back() = num_elements; + raft::update_host(h_element_offsets.data() + 1, + d_element_offsets.data(), + d_element_offsets.size(), + handle.get_stream()); + std::vector h_chunk_offsets(num_chunks + 1, vertex_t{0}); + h_chunk_offsets.back() = offsets.size() - 1; + raft::update_host(h_chunk_offsets.data() + 1, + d_chunk_offsets.data(), + d_chunk_offsets.size(), handle.get_stream()); handle.sync_stream(); - return std::make_tuple(h_vertex_offsets, h_edge_offsets); + return std::make_tuple(h_chunk_offsets, h_element_offsets); } else { - return std::make_tuple(std::vector{{0, num_vertices}}, - std::vector{{0, num_edges}}); + return std::make_tuple(std::vector{{0, offsets.size() - 1}}, + std::vector{{0, num_elements}}); } } diff --git a/cpp/include/cugraph_c/community_algorithms.h b/cpp/include/cugraph_c/community_algorithms.h index e8a71a40162..cb3d6b6375a 100644 --- a/cpp/include/cugraph_c/community_algorithms.h +++ b/cpp/include/cugraph_c/community_algorithms.h @@ -177,6 +177,41 @@ double cugraph_hierarchical_clustering_result_get_modularity( */ void cugraph_hierarchical_clustering_result_free(cugraph_hierarchical_clustering_result_t* result); +/** + * @brief Compute ECG clustering + * + * @param [in] handle Handle for accessing resources + * @param [in/out] rng_state State of the random number generator, updated with each call + * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage + * needs to be transposed + * @param [in] min_weight Minimum edge weight in final graph + * @param [in] ensemble_size The number of Louvain iterations to run + * @param [in] max_level Maximum level in hierarchy for final Louvain + * @param [in] threshold Threshold parameter, defines convergence at each level of hierarchy + * for final Louvain + * @param [in] resolution Resolution parameter (gamma) in modularity formula. + * This changes the size of the communities. Higher resolutions + * lead to more smaller communities, lower resolutions lead to + * fewer larger communities. + * @param [in] do_expensive_check + * A flag to run expensive checks for input arguments (if set to true) + * @param [out] result Output from the Louvain call + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_ecg(const cugraph_resource_handle_t* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + double min_weight, + size_t ensemble_size, + size_t max_level, + double threshold, + double resolution, + bool_t do_expensive_check, + cugraph_hierarchical_clustering_result_t** result, + cugraph_error_t** error); + /** * @brief Compute ECG clustering of the given graph * @@ -200,13 +235,13 @@ void cugraph_hierarchical_clustering_result_free(cugraph_hierarchical_clustering * be populated if error code is not CUGRAPH_SUCCESS * @return error code */ -cugraph_error_code_t cugraph_ecg(const cugraph_resource_handle_t* handle, - cugraph_graph_t* graph, - double min_weight, - size_t ensemble_size, - bool_t do_expensive_check, - cugraph_hierarchical_clustering_result_t** result, - cugraph_error_t** error); +cugraph_error_code_t cugraph_legacy_ecg(const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + double min_weight, + size_t ensemble_size, + bool_t do_expensive_check, + cugraph_hierarchical_clustering_result_t** result, + cugraph_error_t** error); /** * @brief Extract ego graphs diff --git a/cpp/include/cugraph_c/graph_functions.h b/cpp/include/cugraph_c/graph_functions.h index 19b69922fa5..8fe1ea0b958 100644 --- a/cpp/include/cugraph_c/graph_functions.h +++ b/cpp/include/cugraph_c/graph_functions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cugraph_c/similarity_algorithms.h b/cpp/include/cugraph_c/similarity_algorithms.h index b8f61b46545..5b8462a1666 100644 --- a/cpp/include/cugraph_c/similarity_algorithms.h +++ b/cpp/include/cugraph_c/similarity_algorithms.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,6 +36,16 @@ typedef struct { int32_t align_; } cugraph_similarity_result_t; +/** + * @ingroup similarity + * @brief Get vertex pair from the similarity result. + * + * @param [in] result The result from a similarity algorithm + * @return vertex pairs + */ +cugraph_vertex_pairs_t* cugraph_similarity_result_get_vertex_pairs( + cugraph_similarity_result_t* result); + /** * @ingroup similarity * @brief Get the similarity coefficient array @@ -135,6 +145,120 @@ cugraph_error_code_t cugraph_overlap_coefficients(const cugraph_resource_handle_ cugraph_similarity_result_t** result, cugraph_error_t** error); +/** + * @brief Perform All-Pairs Jaccard similarity computation + * + * Compute the similarity for all vertex pairs derived from the two-hop neighbors + * of an optional specified vertex list. This function will identify the two-hop + * neighbors of the specified vertices (all vertices in the graph if not specified) + * and compute similarity for those vertices. + * + * If the topk parameter is specified then the result will only contain the top k + * highest scoring results. + * + * Note that Jaccard similarity must run on a symmetric graph. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] vertices Vertex list for input. If null then compute based on + * all vertices in the graph. + * @param [in] use_weight If true consider the edge weight in the graph, if false use an + * edge weight of 1 + * @param [in] topk Specify how many answers to return. Specifying SIZE_MAX + * will return all values. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + * @param [out] result Opaque pointer to similarity results + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_all_pairs_jaccard_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error); + +/** + * @brief Perform All Pairs Sorensen similarity computation + * + * Compute the similarity for all vertex pairs derived from the two-hop neighbors + * of an optional specified vertex list. This function will identify the two-hop + * neighbors of the specified vertices (all vertices in the graph if not specified) + * and compute similarity for those vertices. + * + * If the topk parameter is specified then the result will only contain the top k + * highest scoring results. + * + * Note that Sorensen similarity must run on a symmetric graph. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] vertices Vertex list for input. If null then compute based on + * all vertices in the graph. + * @param [in] use_weight If true consider the edge weight in the graph, if false use an + * edge weight of 1 + * @param [in] topk Specify how many answers to return. Specifying SIZE_MAX + * will return all values. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + * @param [out] result Opaque pointer to similarity results + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_all_pairs_sorensen_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error); + +/** + * @brief Perform All Pairs overlap similarity computation + * + * Compute the similarity for all vertex pairs derived from the two-hop neighbors + * of an optional specified vertex list. This function will identify the two-hop + * neighbors of the specified vertices (all vertices in the graph if not specified) + * and compute similarity for those vertices. + * + * If the topk parameter is specified then the result will only contain the top k + * highest scoring results. + * + * Note that overlap similarity must run on a symmetric graph. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] vertices Vertex list for input. If null then compute based on + * all vertices in the graph. + * @param [in] use_weight If true consider the edge weight in the graph, if false use an + * edge weight of 1 + * @param [in] topk Specify how many answers to return. Specifying SIZE_MAX + * will return all values. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + * @param [out] result Opaque pointer to similarity results + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_all_pairs_overlap_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error); + #ifdef __cplusplus } #endif diff --git a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh index ab14ff6c685..18e3a6669ad 100644 --- a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh +++ b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2024, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ /* * FIXME: This file is copied from cudf because CuCollections doesnt support concurrent - * insert/find for 8 byte key-value pair size. The plan is to migrate to + * insert/find for 8 byte key-value pair size. The plan is to migrate to * using the cuco when the feature is supported. At that point this file can be deleted. */ #pragma once diff --git a/cpp/src/c_api/ecg.cpp b/cpp/src/c_api/ecg.cpp new file mode 100644 index 00000000000..f91b86a864f --- /dev/null +++ b/cpp/src/c_api/ecg.cpp @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace { + +struct ecg_functor : public cugraph::c_api::abstract_functor { + raft::handle_t const& handle_; + cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr}; + cugraph::c_api::cugraph_graph_t* graph_{nullptr}; + double min_weight_{0.1}; + size_t ensemble_size_{10}; + size_t max_level_{0}; + double threshold_{0.001}; + double resolution_{1}; + bool do_expensive_check_{false}; + cugraph::c_api::cugraph_hierarchical_clustering_result_t* result_{}; + + ecg_functor(::cugraph_resource_handle_t const* handle, + ::cugraph_rng_state_t* rng_state, + ::cugraph_graph_t* graph, + double min_weight, + size_t ensemble_size, + size_t max_level, + double threshold, + double resolution, + bool do_expensive_check) + : abstract_functor(), + handle_(*reinterpret_cast(handle)->handle_), + rng_state_(reinterpret_cast(rng_state)), + graph_(reinterpret_cast(graph)), + max_level_(max_level), + threshold_(threshold), + resolution_(resolution), + do_expensive_check_(do_expensive_check) + { + } + + template + void operator()() + { + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else { + // ecg expects store_transposed == false + if constexpr (store_transposed) { + error_code_ = cugraph::c_api:: + transpose_storage( + handle_, graph_, error_.get()); + if (error_code_ != CUGRAPH_SUCCESS) return; + } + + auto graph = + reinterpret_cast*>(graph_->graph_); + + auto graph_view = graph->view(); + + auto edge_weights = reinterpret_cast< + cugraph::edge_property_t, + weight_t>*>(graph_->edge_weights_); + + auto number_map = reinterpret_cast*>(graph_->number_map_); + + rmm::device_uvector clusters(0, handle_.get_stream()); + + weight_t modularity; + + std::tie(clusters, std::ignore, modularity) = + cugraph::ecg(handle_, + rng_state_->rng_state_, + graph_view, + (edge_weights != nullptr) + ? std::make_optional(edge_weights->view()) + : std::make_optional(cugraph::c_api::create_constant_edge_property( + handle_, graph_view, weight_t{1}) + .view()), + static_cast(min_weight_), + ensemble_size_, + max_level_, + static_cast(threshold_), + static_cast(resolution_)); + + rmm::device_uvector vertices(graph_view.local_vertex_partition_range_size(), + handle_.get_stream()); + raft::copy(vertices.data(), number_map->data(), vertices.size(), handle_.get_stream()); + + result_ = new cugraph::c_api::cugraph_hierarchical_clustering_result_t{ + modularity, + new cugraph::c_api::cugraph_type_erased_device_array_t(vertices, graph_->vertex_type_), + new cugraph::c_api::cugraph_type_erased_device_array_t(clusters, graph_->vertex_type_)}; + } + } +}; + +} // namespace + +extern "C" cugraph_error_code_t cugraph_ecg(const cugraph_resource_handle_t* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + double min_weight, + size_t ensemble_size, + size_t max_level, + double threshold, + double resolution, + bool_t do_expensive_check, + cugraph_hierarchical_clustering_result_t** result, + cugraph_error_t** error) +{ + ecg_functor functor(handle, + rng_state, + graph, + min_weight, + ensemble_size, + max_level, + threshold, + resolution, + do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} diff --git a/cpp/src/c_api/legacy_ecg.cpp b/cpp/src/c_api/legacy_ecg.cpp index 304d9272910..6fee219f303 100644 --- a/cpp/src/c_api/legacy_ecg.cpp +++ b/cpp/src/c_api/legacy_ecg.cpp @@ -31,7 +31,7 @@ namespace { -struct ecg_functor : public cugraph::c_api::abstract_functor { +struct legacy_ecg_functor : public cugraph::c_api::abstract_functor { raft::handle_t const& handle_; cugraph::c_api::cugraph_graph_t* graph_; double min_weight_; @@ -39,11 +39,11 @@ struct ecg_functor : public cugraph::c_api::abstract_functor { bool do_expensive_check_; cugraph::c_api::cugraph_hierarchical_clustering_result_t* result_{}; - ecg_functor(::cugraph_resource_handle_t const* handle, - ::cugraph_graph_t* graph, - double min_weight, - size_t ensemble_size, - bool do_expensive_check) + legacy_ecg_functor(::cugraph_resource_handle_t const* handle, + ::cugraph_graph_t* graph, + double min_weight, + size_t ensemble_size, + bool do_expensive_check) : abstract_functor(), handle_(*reinterpret_cast(handle)->handle_), graph_(reinterpret_cast(graph)), @@ -120,15 +120,16 @@ struct ecg_functor : public cugraph::c_api::abstract_functor { } // namespace -extern "C" cugraph_error_code_t cugraph_ecg(const cugraph_resource_handle_t* handle, - cugraph_graph_t* graph, - double min_weight, - size_t ensemble_size, - bool_t do_expensive_check, - cugraph_hierarchical_clustering_result_t** result, - cugraph_error_t** error) +extern "C" cugraph_error_code_t cugraph_legacy_ecg( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + double min_weight, + size_t ensemble_size, + bool_t do_expensive_check, + cugraph_hierarchical_clustering_result_t** result, + cugraph_error_t** error) { - ecg_functor functor(handle, graph, min_weight, ensemble_size, do_expensive_check); + legacy_ecg_functor functor(handle, graph, min_weight, ensemble_size, do_expensive_check); return cugraph::c_api::run_algorithm(graph, functor, result, error); } diff --git a/cpp/src/c_api/similarity.cpp b/cpp/src/c_api/similarity.cpp index f456c957f8e..aa54fc6dee7 100644 --- a/cpp/src/c_api/similarity.cpp +++ b/cpp/src/c_api/similarity.cpp @@ -34,6 +34,7 @@ namespace c_api { struct cugraph_similarity_result_t { cugraph_type_erased_device_array_t* similarity_coefficients_; + cugraph_vertex_pairs_t* vertex_pairs_; }; } // namespace c_api @@ -131,12 +132,92 @@ struct similarity_functor : public cugraph::c_api::abstract_functor { graph_view, use_weight_ ? std::make_optional(edge_weights->view()) : std::nullopt, std::make_tuple(raft::device_span{v1.data(), v1.size()}, - raft::device_span{v2.data(), v2.size()}), - use_weight_); + raft::device_span{v2.data(), v2.size()})); result_ = new cugraph::c_api::cugraph_similarity_result_t{ new cugraph::c_api::cugraph_type_erased_device_array_t(similarity_coefficients, - graph_->weight_type_)}; + graph_->weight_type_), + nullptr}; + } + } +}; + +template +struct all_pairs_similarity_functor : public cugraph::c_api::abstract_functor { + raft::handle_t const& handle_; + cugraph::c_api::cugraph_graph_t* graph_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertices_; + call_similarity_functor_t call_similarity_; + bool use_weight_; + size_t topk_; + bool do_expensive_check_; + + cugraph::c_api::cugraph_similarity_result_t* result_{}; + + all_pairs_similarity_functor(::cugraph_resource_handle_t const* handle, + ::cugraph_graph_t* graph, + ::cugraph_type_erased_device_array_view_t const* vertices, + call_similarity_functor_t call_similarity, + bool use_weight, + size_t topk, + bool do_expensive_check) + : abstract_functor(), + handle_(*reinterpret_cast(handle)->handle_), + graph_(reinterpret_cast(graph)), + vertices_( + reinterpret_cast(vertices)), + call_similarity_(call_similarity), + use_weight_(use_weight), + topk_(topk), + do_expensive_check_(do_expensive_check) + { + } + + template + void operator()() + { + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else { + // similarity algorithms expect store_transposed == false + if constexpr (store_transposed) { + error_code_ = cugraph::c_api:: + transpose_storage( + handle_, graph_, error_.get()); + if (error_code_ != CUGRAPH_SUCCESS) return; + } + + auto graph = + reinterpret_cast*>(graph_->graph_); + + auto graph_view = graph->view(); + + auto edge_weights = reinterpret_cast< + cugraph::edge_property_t, + weight_t>*>(graph_->edge_weights_); + + auto number_map = reinterpret_cast*>(graph_->number_map_); + + auto [v1, v2, similarity_coefficients] = + call_similarity_(handle_, + graph_view, + use_weight_ ? std::make_optional(edge_weights->view()) : std::nullopt, + vertices_ ? std::make_optional(raft::device_span{ + vertices_->as_type(), vertices_->size_}) + : std::nullopt, + topk_ != SIZE_MAX ? std::make_optional(topk_) : std::nullopt); + + result_ = new cugraph::c_api::cugraph_similarity_result_t{ + new cugraph::c_api::cugraph_type_erased_device_array_t(similarity_coefficients, + graph_->weight_type_), + new cugraph::c_api::cugraph_vertex_pairs_t{ + new cugraph::c_api::cugraph_type_erased_device_array_t(v1, graph_->vertex_type_), + new cugraph::c_api::cugraph_type_erased_device_array_t(v2, graph_->vertex_type_)}}; } } }; @@ -147,11 +228,24 @@ struct jaccard_functor { raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, - std::tuple, raft::device_span> vertex_pairs, - bool use_weights) + std::tuple, raft::device_span> vertex_pairs) { return cugraph::jaccard_coefficients(handle, graph_view, edge_weight_view, vertex_pairs); } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector> + operator()(raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk) + { + return cugraph::jaccard_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct sorensen_functor { @@ -160,11 +254,24 @@ struct sorensen_functor { raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, - std::tuple, raft::device_span> vertex_pairs, - bool use_weights) + std::tuple, raft::device_span> vertex_pairs) { return cugraph::sorensen_coefficients(handle, graph_view, edge_weight_view, vertex_pairs); } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector> + operator()(raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk) + { + return cugraph::sorensen_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct overlap_functor { @@ -173,11 +280,24 @@ struct overlap_functor { raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, - std::tuple, raft::device_span> vertex_pairs, - bool use_weights) + std::tuple, raft::device_span> vertex_pairs) { return cugraph::overlap_coefficients(handle, graph_view, edge_weight_view, vertex_pairs); } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector> + operator()(raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk) + { + return cugraph::overlap_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; } // namespace @@ -185,11 +305,19 @@ struct overlap_functor { extern "C" cugraph_type_erased_device_array_view_t* cugraph_similarity_result_get_similarity( cugraph_similarity_result_t* result) { - auto internal_pointer = reinterpret_cast(result); + auto internal_pointer = + reinterpret_cast(result); return reinterpret_cast( internal_pointer->similarity_coefficients_->view()); } +extern "C" cugraph_vertex_pairs_t* cugraph_similarity_result_get_vertex_pairs( + cugraph_similarity_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return reinterpret_cast(internal_pointer->vertex_pairs_); +} + extern "C" void cugraph_similarity_result_free(cugraph_similarity_result_t* result) { auto internal_pointer = reinterpret_cast(result); @@ -262,3 +390,72 @@ extern "C" cugraph_error_code_t cugraph_overlap_coefficients( return cugraph::c_api::run_algorithm(graph, functor, result, error); } + +extern "C" cugraph_error_code_t cugraph_all_pairs_jaccard_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error) +{ + if (use_weight) { + CAPI_EXPECTS( + reinterpret_cast(graph)->edge_weights_ != nullptr, + CUGRAPH_INVALID_INPUT, + "use_weight is true but edge weights are not provided.", + *error); + } + all_pairs_similarity_functor functor( + handle, graph, vertices, jaccard_functor{}, use_weight, topk, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} + +extern "C" cugraph_error_code_t cugraph_all_pairs_sorensen_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error) +{ + if (use_weight) { + CAPI_EXPECTS( + reinterpret_cast(graph)->edge_weights_ != nullptr, + CUGRAPH_INVALID_INPUT, + "use_weight is true but edge weights are not provided.", + *error); + } + all_pairs_similarity_functor functor( + handle, graph, vertices, sorensen_functor{}, use_weight, topk, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} + +extern "C" cugraph_error_code_t cugraph_all_pairs_overlap_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error) +{ + if (use_weight) { + CAPI_EXPECTS( + reinterpret_cast(graph)->edge_weights_ != nullptr, + CUGRAPH_INVALID_INPUT, + "use_weight is true but edge weights are not provided.", + *error); + } + all_pairs_similarity_functor functor( + handle, graph, vertices, overlap_functor{}, use_weight, topk, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} diff --git a/cpp/src/link_prediction/jaccard_impl.cuh b/cpp/src/link_prediction/jaccard_impl.cuh index bdc80f4f6ac..d8cfcf19b4f 100644 --- a/cpp/src/link_prediction/jaccard_impl.cuh +++ b/cpp/src/link_prediction/jaccard_impl.cuh @@ -57,4 +57,26 @@ rmm::device_uvector jaccard_coefficients( do_expensive_check); } +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check) +{ + CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); + + return detail::all_pairs_similarity(handle, + graph_view, + edge_weight_view, + vertices, + topk, + detail::jaccard_functor_t{}, + do_expensive_check); +} + } // namespace cugraph diff --git a/cpp/src/link_prediction/jaccard_mg.cu b/cpp/src/link_prediction/jaccard_mg.cu index 8e91bb9f3e1..f53d173c6dd 100644 --- a/cpp/src/link_prediction/jaccard_mg.cu +++ b/cpp/src/link_prediction/jaccard_mg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector jaccard_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/jaccard_sg.cu b/cpp/src/link_prediction/jaccard_sg.cu index e25d2d72d3b..6bb89a21368 100644 --- a/cpp/src/link_prediction/jaccard_sg.cu +++ b/cpp/src/link_prediction/jaccard_sg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector jaccard_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap_impl.cuh b/cpp/src/link_prediction/overlap_impl.cuh index 95542a83ff5..38e654453ff 100644 --- a/cpp/src/link_prediction/overlap_impl.cuh +++ b/cpp/src/link_prediction/overlap_impl.cuh @@ -57,4 +57,26 @@ rmm::device_uvector overlap_coefficients( do_expensive_check); } +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check) +{ + CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); + + return detail::all_pairs_similarity(handle, + graph_view, + edge_weight_view, + vertices, + topk, + detail::overlap_functor_t{}, + do_expensive_check); +} + } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap_mg.cu b/cpp/src/link_prediction/overlap_mg.cu index 54f7912aa14..a88159e682b 100644 --- a/cpp/src/link_prediction/overlap_mg.cu +++ b/cpp/src/link_prediction/overlap_mg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector overlap_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap_sg.cu b/cpp/src/link_prediction/overlap_sg.cu index 1b169570e57..17e4bafdcbe 100644 --- a/cpp/src/link_prediction/overlap_sg.cu +++ b/cpp/src/link_prediction/overlap_sg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector overlap_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 4344262e453..c13259f0da7 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -17,6 +17,7 @@ #include "prims/count_if_e.cuh" #include "prims/per_v_pair_transform_dst_nbr_intersection.cuh" +#include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "utilities/error_check_utils.cuh" @@ -162,5 +163,392 @@ rmm::device_uvector similarity( } } +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector> +all_pairs_similarity(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + functor_t functor, + bool do_expensive_check = false) +{ + using GraphViewType = graph_view_t; + + CUGRAPH_EXPECTS(graph_view.is_symmetric(), + "similarity algorithms require an undirected(symmetric) graph"); + + // FIXME: See https://github.com/rapidsai/cugraph/issues/4132 + // Once that issue is resolved we can drop this check + CUGRAPH_EXPECTS(!graph_view.is_multigraph() || !edge_weight_view, + "Weighted implementation currently fails on multi-graph"); + + if (do_expensive_check) { + if (vertices) { + auto vertex_partition = vertex_partition_device_view_t( + graph_view.local_vertex_partition_view()); + auto num_invalid_vertices = + thrust::count_if(handle.get_thrust_policy(), + vertices->begin(), + vertices->end(), + [vertex_partition] __device__(auto val) { + return !(vertex_partition.is_valid_vertex(val) && + vertex_partition.in_local_vertex_partition_range_nocheck(val)); + }); + + if constexpr (multi_gpu) { + num_invalid_vertices = cugraph::host_scalar_allreduce( + handle.get_comms(), num_invalid_vertices, raft::comms::op_t::SUM, handle.get_stream()); + } + + CUGRAPH_EXPECTS(num_invalid_vertices == 0, + "Invalid input arguments: there are invalid input vertices."); + } + + if (edge_weight_view) { + auto num_negative_edge_weights = + count_if_e(handle, + graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + [] __device__(vertex_t, vertex_t, auto, auto, weight_t w) { return w < 0.0; }); + + if constexpr (multi_gpu) { + num_negative_edge_weights = cugraph::host_scalar_allreduce(handle.get_comms(), + num_negative_edge_weights, + raft::comms::op_t::SUM, + handle.get_stream()); + } + + CUGRAPH_EXPECTS( + num_negative_edge_weights == 0, + "Invalid input argument: input edge weights should have non-negative values."); + } + } + + if (topk) { + rmm::device_uvector tmp_vertices(0, handle.get_stream()); + + if (vertices) { + tmp_vertices.resize(vertices->size(), handle.get_stream()); + thrust::copy( + handle.get_thrust_policy(), vertices->begin(), vertices->end(), tmp_vertices.begin()); + } else { + tmp_vertices.resize(graph_view.local_vertex_partition_range_size(), handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), + tmp_vertices.begin(), + tmp_vertices.end(), + graph_view.local_vertex_partition_range_first()); + } + + // We can reduce memory footprint by doing work in batches and + // computing/updating topk with each batch + + // FIXME: Experiment with this and adjust as necessary + // size_t const + // MAX_PAIRS_PER_BATCH{static_cast(handle.get_device_properties().multiProcessorCount) * + // (1 << 15)}; + size_t const MAX_PAIRS_PER_BATCH{100}; + + rmm::device_uvector degrees = graph_view.compute_out_degrees(handle); + rmm::device_uvector two_hop_degrees(degrees.size() + 1, handle.get_stream()); + + // Let's compute the maximum size of the 2-hop neighborhood of each vertex + // FIXME: If vertices is specified, this could be done on a subset of the vertices + // + edge_dst_property_t edge_dst_degrees(handle, graph_view); + update_edge_dst_property(handle, graph_view, degrees.begin(), edge_dst_degrees); + + per_v_transform_reduce_incoming_e( + handle, + graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_degrees.view(), + edge_dummy_property_t{}.view(), + [] __device__(vertex_t, vertex_t, auto, auto dst_degree, auto) { + return static_cast(dst_degree); + }, + size_t{0}, + reduce_op::plus{}, + two_hop_degrees.begin()); + + if (vertices) { + rmm::device_uvector gathered_two_hop_degrees(tmp_vertices.size() + 1, + handle.get_stream()); + + thrust::gather( + handle.get_thrust_policy(), + thrust::make_transform_iterator( + tmp_vertices.begin(), + cugraph::detail::shift_left_t{graph_view.local_vertex_partition_range_first()}), + thrust::make_transform_iterator( + tmp_vertices.end(), + cugraph::detail::shift_left_t{graph_view.local_vertex_partition_range_first()}), + two_hop_degrees.begin(), + gathered_two_hop_degrees.begin()); + + two_hop_degrees = std::move(gathered_two_hop_degrees); + } + + thrust::sort_by_key(handle.get_thrust_policy(), + two_hop_degrees.begin(), + two_hop_degrees.end() - 1, + tmp_vertices.begin(), + thrust::greater{}); + + thrust::exclusive_scan(handle.get_thrust_policy(), + two_hop_degrees.begin(), + two_hop_degrees.end(), + two_hop_degrees.begin()); + + auto two_hop_degree_offsets = std::move(two_hop_degrees); + + rmm::device_uvector top_v1(0, handle.get_stream()); + rmm::device_uvector top_v2(0, handle.get_stream()); + rmm::device_uvector top_score(0, handle.get_stream()); + + top_v1.reserve(*topk, handle.get_stream()); + top_v2.reserve(*topk, handle.get_stream()); + top_score.reserve(*topk, handle.get_stream()); + + size_t sum_two_hop_degrees{0}; + weight_t similarity_threshold{0}; + std::vector batch_offsets; + + raft::update_host(&sum_two_hop_degrees, + two_hop_degree_offsets.data() + two_hop_degree_offsets.size() - 1, + 1, + handle.get_stream()); + + std::tie(batch_offsets, std::ignore) = compute_offset_aligned_element_chunks( + handle, + raft::device_span{two_hop_degree_offsets.data(), two_hop_degree_offsets.size()}, + sum_two_hop_degrees, + MAX_PAIRS_PER_BATCH); + + for (size_t batch_number = 0; batch_number < (batch_offsets.size() - 1); ++batch_number) { + if (batch_offsets[batch_number + 1] > batch_offsets[batch_number]) { + auto [offsets, v2] = + k_hop_nbrs(handle, + graph_view, + raft::device_span{ + tmp_vertices.data() + batch_offsets[batch_number], + batch_offsets[batch_number + 1] - batch_offsets[batch_number]}, + 2, + do_expensive_check); + + auto v1 = cugraph::detail::expand_sparse_offsets( + raft::device_span{offsets.data(), offsets.size()}, + vertex_t{0}, + handle.get_stream()); + + cugraph::unrenumber_local_int_vertices( + handle, + v1.data(), + v1.size(), + tmp_vertices.data() + batch_offsets[batch_number], + vertex_t{0}, + static_cast(batch_offsets[batch_number + 1] - batch_offsets[batch_number]), + do_expensive_check); + + auto new_size = thrust::distance( + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::make_zip_iterator(v1.end(), v2.end()), + [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); })); + + v1.resize(new_size, handle.get_stream()); + v2.resize(new_size, handle.get_stream()); + + auto score = + similarity(handle, + graph_view, + edge_weight_view, + std::make_tuple(raft::device_span{v1.data(), v1.size()}, + raft::device_span{v2.data(), v2.size()}), + functor, + do_expensive_check); + + // Add a remove_if to remove items that are less than the last topk element + new_size = thrust::distance( + thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()), + thrust::remove_if(handle.get_thrust_policy(), + thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()), + thrust::make_zip_iterator(score.end(), v1.end(), v2.end()), + [similarity_threshold] __device__(auto tuple) { + return thrust::get<0>(tuple) < similarity_threshold; + })); + + score.resize(new_size, handle.get_stream()); + v1.resize(new_size, handle.get_stream()); + v2.resize(new_size, handle.get_stream()); + + thrust::sort_by_key(handle.get_thrust_policy(), + score.begin(), + score.end(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::greater{}); + + size_t v1_keep = std::min(*topk, v1.size()); + + if (score.size() < (top_v1.size() + v1_keep)) { + score.resize(top_v1.size() + v1_keep, handle.get_stream()); + v1.resize(score.size(), handle.get_stream()); + v2.resize(score.size(), handle.get_stream()); + } + + thrust::copy( + handle.get_thrust_policy(), top_v1.begin(), top_v1.end(), v1.begin() + v1_keep); + thrust::copy( + handle.get_thrust_policy(), top_v2.begin(), top_v2.end(), v2.begin() + v1_keep); + thrust::copy( + handle.get_thrust_policy(), top_score.begin(), top_score.end(), score.begin() + v1_keep); + + thrust::sort_by_key(handle.get_thrust_policy(), + score.begin(), + score.end(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::greater{}); + + if (top_v1.size() < std::min(*topk, v1.size())) { + top_v1.resize(std::min(*topk, v1.size()), handle.get_stream()); + top_v2.resize(top_v1.size(), handle.get_stream()); + top_score.resize(top_v1.size(), handle.get_stream()); + } + + thrust::copy( + handle.get_thrust_policy(), v1.begin(), v1.begin() + top_v1.size(), top_v1.begin()); + thrust::copy( + handle.get_thrust_policy(), v2.begin(), v2.begin() + top_v1.size(), top_v2.begin()); + thrust::copy(handle.get_thrust_policy(), + score.begin(), + score.begin() + top_v1.size(), + top_score.begin()); + + if constexpr (multi_gpu) { + bool is_root = handle.get_comms().get_rank() == int{0}; + auto rx_sizes = cugraph::host_scalar_gather( + handle.get_comms(), top_v1.size(), int{0}, handle.get_stream()); + std::vector rx_displs; + size_t gathered_size{0}; + + if (is_root) { + rx_displs.resize(handle.get_comms().get_size()); + rx_displs[0] = 0; + std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); + gathered_size = std::reduce(rx_sizes.begin(), rx_sizes.end()); + } + + rmm::device_uvector gathered_v1(gathered_size, handle.get_stream()); + rmm::device_uvector gathered_v2(gathered_size, handle.get_stream()); + rmm::device_uvector gathered_score(gathered_size, handle.get_stream()); + + cugraph::device_gatherv( + handle.get_comms(), + thrust::make_zip_iterator(top_v1.begin(), top_v2.begin(), top_score.begin()), + thrust::make_zip_iterator( + gathered_v1.begin(), gathered_v2.begin(), gathered_score.begin()), + + top_v1.size(), + rx_sizes, + rx_displs, + int{0}, + handle.get_stream()); + + if (is_root) { + thrust::sort_by_key(handle.get_thrust_policy(), + gathered_score.begin(), + gathered_score.end(), + thrust::make_zip_iterator(gathered_v1.begin(), gathered_v2.begin()), + thrust::greater{}); + + if (gathered_v1.size() > *topk) { + gathered_v1.resize(*topk, handle.get_stream()); + gathered_v2.resize(*topk, handle.get_stream()); + gathered_score.resize(*topk, handle.get_stream()); + } + + top_v1 = std::move(gathered_v1); + top_v2 = std::move(gathered_v2); + top_score = std::move(gathered_score); + } else { + top_v1.resize(0, handle.get_stream()); + top_v2.resize(0, handle.get_stream()); + top_score.resize(0, handle.get_stream()); + } + } + + if (top_score.size() == *topk) { + raft::update_host( + &similarity_threshold, top_score.data() + *topk - 1, 1, handle.get_stream()); + + if constexpr (multi_gpu) { + similarity_threshold = host_scalar_bcast( + handle.get_comms(), similarity_threshold, int{0}, handle.get_stream()); + } + } + } + } + + return std::make_tuple(std::move(top_v1), std::move(top_v2), std::move(top_score)); + } else { + rmm::device_uvector tmp_vertices(0, handle.get_stream()); + raft::device_span vertices_span{nullptr, size_t{0}}; + + if (vertices) { + vertices_span = raft::device_span{vertices->data(), vertices->size()}; + } else { + tmp_vertices.resize(graph_view.local_vertex_partition_range_size(), handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), + tmp_vertices.begin(), + tmp_vertices.end(), + graph_view.local_vertex_partition_range_first()); + vertices_span = raft::device_span{tmp_vertices.data(), tmp_vertices.size()}; + } + + auto [offsets, v2] = k_hop_nbrs(handle, graph_view, vertices_span, 2, do_expensive_check); + + auto v1 = cugraph::detail::expand_sparse_offsets( + raft::device_span{offsets.data(), offsets.size()}, + vertex_t{0}, + handle.get_stream()); + + cugraph::unrenumber_local_int_vertices(handle, + v1.data(), + v1.size(), + vertices_span.data(), + vertex_t{0}, + static_cast(vertices_span.size()), + do_expensive_check); + + auto new_size = thrust::distance( + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::make_zip_iterator(v1.end(), v2.end()), + [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); })); + + v1.resize(new_size, handle.get_stream()); + v2.resize(new_size, handle.get_stream()); + + auto score = + similarity(handle, + graph_view, + edge_weight_view, + std::make_tuple(raft::device_span{v1.data(), v1.size()}, + raft::device_span{v2.data(), v2.size()}), + functor, + do_expensive_check); + + return std::make_tuple(std::move(v1), std::move(v2), std::move(score)); + } +} + } // namespace detail } // namespace cugraph diff --git a/cpp/src/link_prediction/sorensen_impl.cuh b/cpp/src/link_prediction/sorensen_impl.cuh index 994d824b849..af99732a45e 100644 --- a/cpp/src/link_prediction/sorensen_impl.cuh +++ b/cpp/src/link_prediction/sorensen_impl.cuh @@ -57,4 +57,26 @@ rmm::device_uvector sorensen_coefficients( do_expensive_check); } +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check) +{ + CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); + + return detail::all_pairs_similarity(handle, + graph_view, + edge_weight_view, + vertices, + topk, + detail::sorensen_functor_t{}, + do_expensive_check); +} + } // namespace cugraph diff --git a/cpp/src/link_prediction/sorensen_mg.cu b/cpp/src/link_prediction/sorensen_mg.cu index 0a67a871b87..c3d010f8503 100644 --- a/cpp/src/link_prediction/sorensen_mg.cu +++ b/cpp/src/link_prediction/sorensen_mg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector sorensen_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/sorensen_sg.cu b/cpp/src/link_prediction/sorensen_sg.cu index 2edfb92bb0f..c129cd40ca4 100644 --- a/cpp/src/link_prediction/sorensen_sg.cu +++ b/cpp/src/link_prediction/sorensen_sg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector sorensen_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 8e627392555..3b25ae50773 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -355,13 +355,15 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( // to limit memory footprint ((1 << 20) is a tuning parameter) auto approx_edges_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); - auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( + auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( handle, - edge_partition.offsets(), - edge_partition.dcs_nzd_vertices() - ? (*segment_offsets)[detail::num_sparse_segments_per_vertex_partition] + - *(edge_partition.dcs_nzd_vertex_count()) - : edge_partition.major_range_size(), + raft::device_span{ + edge_partition.offsets(), + 1 + static_cast( + edge_partition.dcs_nzd_vertices() + ? (*segment_offsets)[detail::num_sparse_segments_per_vertex_partition] + + *(edge_partition.dcs_nzd_vertex_count()) + : edge_partition.major_range_size())}, edge_partition.number_of_edges(), approx_edges_to_sort_per_iteration); auto num_chunks = h_vertex_offsets.size() - 1; diff --git a/cpp/src/prims/transform_e.cuh b/cpp/src/prims/transform_e.cuh index a34a5a04b49..2cb1a5358b0 100644 --- a/cpp/src/prims/transform_e.cuh +++ b/cpp/src/prims/transform_e.cuh @@ -85,7 +85,7 @@ __global__ void transform_e_packed_bool( if (local_edge_idx < num_edges) { bool compute_predicate = true; if constexpr (check_edge_mask) { - compute_predicate = (edge_mask & packed_bool_mask(lane_id) != packed_bool_empty_mask()); + compute_predicate = ((edge_mask & packed_bool_mask(lane_id)) != packed_bool_empty_mask()); } if (compute_predicate) { @@ -111,10 +111,10 @@ __global__ void transform_e_packed_bool( uint32_t new_val = __ballot_sync(raft::warp_full_mask(), predicate); if (lane_id == 0) { if constexpr (check_edge_mask) { - *(edge_partition_e_value_output.value_first() + idx) = new_val; - } else { auto old_val = *(edge_partition_e_value_output.value_first() + idx); *(edge_partition_e_value_output.value_first() + idx) = (old_val & ~edge_mask) | new_val; + } else { + *(edge_partition_e_value_output.value_first() + idx) = new_val; } } @@ -196,6 +196,9 @@ struct update_e_value_t { __device__ void operator()(typename GraphViewType::edge_type i) const { + if constexpr (check_edge_mask) { + if (!edge_partition_e_mask.get(i)) { return; } + } auto major_idx = edge_partition.major_idx_from_local_edge_idx_nocheck(i); auto major = edge_partition.major_from_major_idx_nocheck(major_idx); auto major_offset = edge_partition.major_offset_from_major_nocheck(major); diff --git a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh b/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh index 0f128eb8410..f5bc3ef6d2e 100644 --- a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh +++ b/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh @@ -107,12 +107,8 @@ compute_min_hop_for_unique_label_vertex_pairs( rmm::device_uvector d_tmp_storage(0, handle.get_stream()); - auto [h_label_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - (*label_offsets).data(), - num_labels, - vertices.size(), - approx_edges_to_sort_per_iteration); + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, *label_offsets, vertices.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; for (size_t i = 0; i < num_chunks; ++i) { @@ -599,10 +595,10 @@ renumber_sampled_edgelist( static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20) /* tuning parameter */; // for segmented sort - auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( handle, - (*renumber_map_label_offsets).data(), - static_cast((*renumber_map_label_offsets).size() - 1), + raft::device_span{(*renumber_map_label_offsets).data(), + (*renumber_map_label_offsets).size()}, renumber_map.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh index f506e4bd04c..299aae13718 100644 --- a/cpp/src/sampling/sampling_post_processing_impl.cuh +++ b/cpp/src/sampling/sampling_post_processing_impl.cuh @@ -286,12 +286,8 @@ compute_min_hop_for_unique_label_vertex_pairs( rmm::device_uvector d_tmp_storage(0, handle.get_stream()); - auto [h_label_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - (*label_offsets).data(), - num_labels, - vertices.size(), - approx_edges_to_sort_per_iteration); + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, *label_offsets, vertices.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; for (size_t i = 0; i < num_chunks; ++i) { @@ -741,10 +737,10 @@ renumber_sampled_edgelist( static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20) /* tuning parameter */; // for segmented sort - auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( handle, - (*renumber_map_label_offsets).data(), - static_cast((*renumber_map_label_offsets).size() - 1), + raft::device_span{(*renumber_map_label_offsets).data(), + (*renumber_map_label_offsets).size()}, renumber_map.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; @@ -910,11 +906,10 @@ sort_sampled_edge_tuples( (1 << 20) /* tuning parameter */; // for sorts in chunks std::tie(h_label_offsets, h_edge_offsets) = - detail::compute_offset_aligned_edge_chunks(handle, - std::get<0>(*edgelist_label_offsets).data(), - std::get<1>(*edgelist_label_offsets), - edgelist_majors.size(), - approx_edges_to_sort_per_iteration); + detail::compute_offset_aligned_element_chunks(handle, + std::get<0>(*edgelist_label_offsets), + edgelist_majors.size(), + approx_edges_to_sort_per_iteration); } else { h_label_offsets = {0, 1}; h_edge_offsets = {0, edgelist_majors.size()}; diff --git a/cpp/src/structure/detail/structure_utils.cuh b/cpp/src/structure/detail/structure_utils.cuh index a96467ce06b..1ef975c1dec 100644 --- a/cpp/src/structure/detail/structure_utils.cuh +++ b/cpp/src/structure/detail/structure_utils.cuh @@ -316,12 +316,8 @@ void sort_adjacency_list(raft::handle_t const& handle, // to limit memory footprint ((1 << 20) is a tuning parameter) auto approx_edges_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); - auto [h_vertex_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - offsets.data(), - static_cast(offsets.size() - 1), - num_edges, - approx_edges_to_sort_per_iteration); + auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, offsets, num_edges, approx_edges_to_sort_per_iteration); auto num_chunks = h_vertex_offsets.size() - 1; // 3. Segmented sort each vertex's neighbors @@ -451,12 +447,8 @@ void sort_adjacency_list(raft::handle_t const& handle, // to limit memory footprint ((1 << 20) is a tuning parameter) auto approx_edges_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); - auto [h_vertex_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - offsets.data(), - static_cast(offsets.size() - 1), - num_edges, - approx_edges_to_sort_per_iteration); + auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, offsets, num_edges, approx_edges_to_sort_per_iteration); auto num_chunks = h_vertex_offsets.size() - 1; // 3. Segmented sort each vertex's neighbors diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 623e795d5be..4ee5ad5ca02 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -602,7 +602,6 @@ graph_view_tpartition_, this->edge_partition_segment_offsets_); } else { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); return compute_minor_degrees(handle, *this); } } @@ -622,7 +621,6 @@ graph_view_tlocal_vertex_partition_range_size()); } else { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); return compute_minor_degrees(handle, *this); } } @@ -633,7 +631,6 @@ graph_view_thas_edge_mask()), "unimplemented."); return compute_minor_degrees(handle, *this); } else { std::optional>> edge_partition_masks{ @@ -663,7 +660,6 @@ graph_view_thas_edge_mask()), "unimplemented."); return compute_minor_degrees(handle, *this); } else { return compute_major_degrees(handle, @@ -681,8 +677,6 @@ template >:: compute_max_in_degree(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); - auto in_degrees = compute_in_degrees(handle); auto it = thrust::max_element(handle.get_thrust_policy(), in_degrees.begin(), in_degrees.end()); rmm::device_scalar ret(edge_t{0}, handle.get_stream()); @@ -699,8 +693,6 @@ template >:: compute_max_in_degree(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); - auto in_degrees = compute_in_degrees(handle); auto it = thrust::max_element(handle.get_thrust_policy(), in_degrees.begin(), in_degrees.end()); edge_t ret{0}; @@ -713,8 +705,6 @@ template >:: compute_max_out_degree(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); - auto out_degrees = compute_out_degrees(handle); auto it = thrust::max_element(handle.get_thrust_policy(), out_degrees.begin(), out_degrees.end()); rmm::device_scalar ret(edge_t{0}, handle.get_stream()); @@ -731,8 +721,6 @@ template >:: compute_max_out_degree(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); - auto out_degrees = compute_out_degrees(handle); auto it = thrust::max_element(handle.get_thrust_policy(), out_degrees.begin(), out_degrees.end()); edge_t ret{0}; @@ -745,8 +733,6 @@ template >:: count_self_loops(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); - return count_if_e( handle, *this, @@ -760,8 +746,6 @@ template >:: count_self_loops(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); - return count_if_e( handle, *this, @@ -775,8 +759,6 @@ template >:: count_multi_edges(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); - if (!this->is_multigraph()) { return edge_t{0}; } edge_t count{0}; @@ -795,8 +777,6 @@ template >:: count_multi_edges(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); - if (!this->is_multigraph()) { return edge_t{0}; } return count_edge_partition_multi_edges( diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 1e5d0489b1f..46a895536ef 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -248,8 +248,8 @@ ConfigureTest(LOUVAIN_TEST community/louvain_test.cpp) ConfigureTest(LEIDEN_TEST community/leiden_test.cpp) ################################################################################################### -# - ECG tests ------------------------------------------------------------------------------------- -ConfigureTest(ECG_TEST community/ecg_test.cpp) +# - Legacy ECG tests ------------------------------------------------------------------------------------- +ConfigureTest(LEGACY_ECG_TEST community/legacy_ecg_test.cpp) ################################################################################################### # - Balanced cut clustering tests ----------------------------------------------------------------- @@ -374,7 +374,7 @@ ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST components/weakly_connected_compo ################################################################################################### # - SIMILARITY tests ------------------------------------------------------------------------------ -ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cpp) +ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cu) ################################################################################################### # - WEIGHTED_SIMILARITY tests ------------------------------------------------------------------------------ @@ -679,6 +679,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureCTestMG(MG_CAPI_TRIANGLE_COUNT_TEST c_api/mg_triangle_count_test.c) ConfigureCTestMG(MG_CAPI_LOUVAIN_TEST c_api/mg_louvain_test.c) ConfigureCTestMG(MG_CAPI_LEIDEN_TEST c_api/mg_leiden_test.c) + ConfigureCTestMG(MG_CAPI_ECG_TEST c_api/mg_ecg_test.c) ConfigureCTestMG(MG_CAPI_CORE_NUMBER_TEST c_api/mg_core_number_test.c) ConfigureCTestMG(MG_CAPI_SIMILARITY_TEST c_api/mg_similarity_test.c) ConfigureCTestMG(MG_CAPI_K_CORE_TEST c_api/mg_k_core_test.c) @@ -737,6 +738,7 @@ ConfigureCTest(CAPI_RANDOM_WALKS_TEST c_api/sg_random_walks_test.c) ConfigureCTest(CAPI_TRIANGLE_COUNT_TEST c_api/triangle_count_test.c) ConfigureCTest(CAPI_LOUVAIN_TEST c_api/louvain_test.c) ConfigureCTest(CAPI_LEIDEN_TEST c_api/leiden_test.c) +ConfigureCTest(CAPI_ECG_TEST c_api/ecg_test.c) ############################################################################# # Skipping due to CUDA 12.2 failure that traces back to RAFT # # TODO: Uncomment this once the issue is fixed. # diff --git a/cpp/tests/c_api/ecg_test.c b/cpp/tests/c_api/ecg_test.c new file mode 100644 index 00000000000..4d4dd64572f --- /dev/null +++ b/cpp/tests/c_api/ecg_test.c @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +int generic_ecg_test(vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + size_t num_vertices, + size_t num_edges, + double min_weight, + size_t ensemble_size, + size_t max_level, + double threshold, + double resolution, + bool_t store_transposed) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_resource_handle_t* handle = NULL; + cugraph_graph_t* graph = NULL; + cugraph_hierarchical_clustering_result_t* result = NULL; + + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + data_type_id_t edge_id_tid = INT32; + data_type_id_t edge_type_tid = INT32; + + handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); + + ret_code = create_sg_test_graph(handle, + vertex_tid, + edge_tid, + h_src, + h_dst, + weight_tid, + h_wgt, + edge_type_tid, + NULL, + edge_id_tid, + NULL, + num_edges, + store_transposed, + FALSE, + FALSE, + FALSE, + &graph, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + cugraph_rng_state_t* rng_state; + ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); + + ret_code = cugraph_ecg(handle, + rng_state, + graph, + min_weight, + ensemble_size, + max_level, + threshold, + resolution, + FALSE, + &result, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_ecg failed."); + + if (test_ret_value == 0) { + cugraph_type_erased_device_array_view_t* vertices; + cugraph_type_erased_device_array_view_t* clusters; + + vertices = cugraph_hierarchical_clustering_result_get_vertices(result); + clusters = cugraph_hierarchical_clustering_result_get_clusters(result); + double modularity = cugraph_hierarchical_clustering_result_get_modularity(result); + + vertex_t h_vertices[num_vertices]; + edge_t h_clusters[num_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_vertices, vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_clusters, clusters, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + // Louvain and permute_range are both tested, here we only make + // sure that function calls succeed as expected. + + cugraph_hierarchical_clustering_result_free(result); + } + + cugraph_sg_graph_free(graph); + cugraph_free_resource_handle(handle); + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int test_ecg() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t max_level = 10; + weight_t threshold = 1e-7; + weight_t resolution = 1.0; + weight_t min_weight = 0.001; + size_t ensemble_size = 10; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = { + 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + + // Louvain wants store_transposed = FALSE + return generic_ecg_test(h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + min_weight, + ensemble_size, + max_level, + threshold, + resolution, + FALSE); +} + +int test_ecg_no_weight() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t max_level = 10; + weight_t threshold = 1e-7; + weight_t resolution = 1.0; + weight_t min_weight = 0.001; + size_t ensemble_size = 10; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + + // Louvain wants store_transposed = FALSE + return generic_ecg_test(h_src, + h_dst, + NULL, + num_vertices, + num_edges, + min_weight, + ensemble_size, + max_level, + threshold, + resolution, + FALSE); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + int result = 0; + result |= RUN_TEST(test_ecg); + result |= RUN_TEST(test_ecg_no_weight); + return result; +} diff --git a/cpp/tests/c_api/generate_rmat_test.c b/cpp/tests/c_api/generate_rmat_test.c index 442031ff054..f1963fc1aa1 100644 --- a/cpp/tests/c_api/generate_rmat_test.c +++ b/cpp/tests/c_api/generate_rmat_test.c @@ -32,7 +32,7 @@ int test_rmat_generation() typedef float weight_t; vertex_t expected_src[] = { 17, 18, 0, 16, 1, 24, 16, 1, 6, 4, 2, 1, 14, 2, 16, 2, 5, 23, 4, 10, 4, 3, 0, 4, 11, 0, 0, 2, 24, 0}; - vertex_t expected_dst[] = { 0, 10, 23, 0, 26, 0, 2, 1, 27, 8, 1, 0, 21, 21, 0, 4, 8, 14, 10, 17, 0, 16, 0, 16, 25, 5, 8, 8, 4, 19}; + vertex_t expected_dst[] = { 0, 10, 23, 0, 26, 0, 2, 1, 27, 8, 1, 0, 21, 21, 0, 4, 8, 14, 10, 17, 0, 16, 0, 16, 25, 5, 8, 8, 4, 19}; cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; cugraph_error_t* ret_error; @@ -47,7 +47,7 @@ int test_rmat_generation() ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); - + ret_code = cugraph_generate_rmat_edgelist(handle, rng_state, 5, @@ -130,7 +130,7 @@ int test_rmat_list_generation() size_t num_lists = 3; vertex_t max_vertex_id[] = { 32, 16, 32 }; size_t expected_len[] = { 20, 16, 20 }; - + ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); diff --git a/cpp/tests/c_api/legacy_ecg_test.c b/cpp/tests/c_api/legacy_ecg_test.c index 5ea1ce79796..b702426b0aa 100644 --- a/cpp/tests/c_api/legacy_ecg_test.c +++ b/cpp/tests/c_api/legacy_ecg_test.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,8 +53,8 @@ int generic_ecg_test(vertex_t* h_src, TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); - ret_code = - cugraph_ecg(p_handle, p_graph, minimum_weight, ensemble_size, FALSE, &p_result, &ret_error); + ret_code = cugraph_legacy_ecg( + p_handle, p_graph, minimum_weight, ensemble_size, FALSE, &p_result, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_ecg failed."); @@ -63,8 +63,8 @@ int generic_ecg_test(vertex_t* h_src, cugraph_type_erased_device_array_view_t* vertices; cugraph_type_erased_device_array_view_t* clusters; - vertices = cugraph_hierarchical_clustering_result_get_vertices(p_result); - clusters = cugraph_hierarchical_clustering_result_get_clusters(p_result); + vertices = cugraph_hierarchical_clustering_result_get_vertices(p_result); + clusters = cugraph_hierarchical_clustering_result_get_clusters(p_result); vertex_t h_vertices[num_vertices]; edge_t h_clusters[num_vertices]; @@ -103,18 +103,11 @@ int test_ecg() vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; weight_t h_wgt[] = { 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t h_result[] = {0, 1, 0, 1, 1, 1}; + vertex_t h_result[] = {0, 1, 0, 1, 1, 1}; // Louvain wants store_transposed = FALSE - return generic_ecg_test(h_src, - h_dst, - h_wgt, - h_result, - num_vertices, - num_edges, - min_weight, - ensemble_size, - FALSE); + return generic_ecg_test( + h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, min_weight, ensemble_size, FALSE); } /******************************************************************************/ diff --git a/cpp/tests/c_api/mg_create_graph_test.c b/cpp/tests/c_api/mg_create_graph_test.c index 7156647b025..12ae730216a 100644 --- a/cpp/tests/c_api/mg_create_graph_test.c +++ b/cpp/tests/c_api/mg_create_graph_test.c @@ -187,7 +187,7 @@ int test_create_mg_graph_multiple_edge_lists(const cugraph_resource_handle_t* ha size_t vertex_count = local_num_vertices / num_local_arrays; size_t vertex_start = i * vertex_count; vertex_count = (i != (num_local_arrays - 1)) ? vertex_count : (local_num_vertices - vertex_start); - + ret_code = cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices create failed."); @@ -375,7 +375,7 @@ int test_create_mg_graph_multiple_edge_lists_multi_edge(const cugraph_resource_h size_t vertex_count = (local_num_vertices + num_local_arrays - 1) / num_local_arrays; size_t vertex_start = i * vertex_count; vertex_count = (i != (num_local_arrays - 1)) ? vertex_count : (local_num_vertices - vertex_start); - + ret_code = cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "vertices create failed."); diff --git a/cpp/tests/c_api/mg_ecg_test.c b/cpp/tests/c_api/mg_ecg_test.c new file mode 100644 index 00000000000..b14ebda2959 --- /dev/null +++ b/cpp/tests/c_api/mg_ecg_test.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +int generic_ecg_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + size_t num_vertices, + size_t num_edges, + double min_weight, + size_t ensemble_size, + size_t max_level, + double threshold, + double resolution, + bool_t store_transposed) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_graph_t* graph = NULL; + cugraph_hierarchical_clustering_result_t* result = NULL; + + int rank = cugraph_resource_handle_get_rank(handle); + + ret_code = create_mg_test_graph( + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, &graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + cugraph_rng_state_t* rng_state; + ret_code = cugraph_rng_state_create(handle, rank, &rng_state, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); + + ret_code = cugraph_ecg(handle, + rng_state, + graph, + min_weight, + ensemble_size, + max_level, + threshold, + resolution, + FALSE, + &result, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_ecg failed."); + + if (test_ret_value == 0) { + cugraph_type_erased_device_array_view_t* vertices; + cugraph_type_erased_device_array_view_t* clusters; + + vertices = cugraph_hierarchical_clustering_result_get_vertices(result); + clusters = cugraph_hierarchical_clustering_result_get_clusters(result); + double modularity = cugraph_hierarchical_clustering_result_get_modularity(result); + + vertex_t h_vertices[num_vertices]; + edge_t h_clusters[num_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_vertices, vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_clusters, clusters, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + size_t num_local_vertices = cugraph_type_erased_device_array_view_size(vertices); + + // Louvain and permute_range are both tested, here we only make + // sure that function calls succeed as expected. + + cugraph_hierarchical_clustering_result_free(result); + } + + cugraph_mg_graph_free(graph); + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int test_ecg(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t max_level = 10; + weight_t threshold = 1e-7; + weight_t resolution = 1.0; + weight_t min_weight = 0.001; + size_t ensemble_size = 10; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = { + 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + + // Louvain wants store_transposed = FALSE + return generic_ecg_test(handle, + h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + min_weight, + ensemble_size, + max_level, + threshold, + resolution, + FALSE); +} + +int test_ecg_no_weight(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t max_level = 10; + weight_t threshold = 1e-7; + weight_t resolution = 1.0; + weight_t min_weight = 0.001; + size_t ensemble_size = 10; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + + // Louvain wants store_transposed = FALSE + return generic_ecg_test(handle, + h_src, + h_dst, + NULL, + num_vertices, + num_edges, + min_weight, + ensemble_size, + max_level, + threshold, + resolution, + FALSE); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + void* raft_handle = create_mg_raft_handle(argc, argv); + cugraph_resource_handle_t* handle = cugraph_create_resource_handle(raft_handle); + + int result = 0; + result |= RUN_MG_TEST(test_ecg, handle); + + cugraph_free_resource_handle(handle); + free_mg_raft_handle(raft_handle); + + return result; +} diff --git a/cpp/tests/c_api/mg_two_hop_neighbors_test.c b/cpp/tests/c_api/mg_two_hop_neighbors_test.c index 37ae191c6b2..056da2bcc45 100644 --- a/cpp/tests/c_api/mg_two_hop_neighbors_test.c +++ b/cpp/tests/c_api/mg_two_hop_neighbors_test.c @@ -76,8 +76,8 @@ int generic_two_hop_nbr_test(const cugraph_resource_handle_t* resource_handle, ret_code = cugraph_two_hop_neighbors( resource_handle, graph, start_vertices_view, FALSE, &result, &ret_error); - cugraph_type_erased_device_array_view_t* v1; - cugraph_type_erased_device_array_view_t* v2; + cugraph_type_erased_device_array_view_t const* v1; + cugraph_type_erased_device_array_view_t const* v2; v1 = cugraph_vertex_pairs_get_first(result); v2 = cugraph_vertex_pairs_get_second(result); diff --git a/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c b/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c index 86a0a92eb01..f84ae9975fd 100644 --- a/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c +++ b/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c @@ -229,7 +229,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle for (size_t i = 1 ; i < sz ; ++i) { tmp_result_offsets[i-1] = tmp_result_offsets[i] - tmp_result_offsets[i-1]; } - + cugraph_test_host_gatherv_fill(handle, tmp_result_offsets, sz-1, SIZE_T, h_result_offsets + 1); h_result_offsets[0] = 0; @@ -255,7 +255,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle M_w[h_src[i]][h_dst[i]] = h_wgt[i]; else M_w[h_src[i]][h_dst[i]] = 1.0; - + if (h_edge_ids != NULL) M_edge_id[h_src[i]][h_dst[i]] = h_edge_ids[i]; if (h_edge_types != NULL) M_edge_type[h_src[i]][h_dst[i]] = h_edge_types[i]; } @@ -900,7 +900,7 @@ int test_uniform_neighbor_sample_alex_bug(const cugraph_resource_handle_t* handl TEST_ASSERT(test_ret_value, result_offsets_size == expected_size[rank], "incorrect number of results"); - + cugraph_sample_result_free(result); #endif diff --git a/cpp/tests/c_api/similarity_test.c b/cpp/tests/c_api/similarity_test.c index 52f849ccd28..ac7c3b98917 100644 --- a/cpp/tests/c_api/similarity_test.c +++ b/cpp/tests/c_api/similarity_test.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -131,6 +131,112 @@ int generic_similarity_test(vertex_t* h_src, return test_ret_value; } +int generic_all_pairs_similarity_test(vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + vertex_t* h_first, + vertex_t* h_second, + weight_t* h_result, + size_t num_vertices, + size_t num_edges, + size_t num_pairs, + bool_t store_transposed, + bool_t use_weight, + size_t topk, + similarity_t test_type) +{ + int test_ret_value = 0; + data_type_id_t vertex_tid = INT32; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_resource_handle_t* handle = NULL; + cugraph_graph_t* graph = NULL; + cugraph_similarity_result_t* result = NULL; + cugraph_type_erased_device_array_t* vertices = NULL; + cugraph_type_erased_device_array_view_t* vertices_view = NULL; + + handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); + + ret_code = create_test_graph( + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, TRUE, &graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + switch (test_type) { + case JACCARD: + ret_code = cugraph_all_pairs_jaccard_coefficients( + handle, graph, vertices_view, use_weight, topk, FALSE, &result, &ret_error); + break; + case SORENSEN: + ret_code = cugraph_all_pairs_sorensen_coefficients( + handle, graph, vertices_view, use_weight, topk, FALSE, &result, &ret_error); + break; + case OVERLAP: + ret_code = cugraph_all_pairs_overlap_coefficients( + handle, graph, vertices_view, use_weight, topk, FALSE, &result, &ret_error); + break; + } + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph similarity failed."); + + cugraph_type_erased_device_array_view_t* similarity_coefficient; + + cugraph_vertex_pairs_t * vertex_pairs; + vertex_pairs = cugraph_similarity_result_get_vertex_pairs(result); + similarity_coefficient = cugraph_similarity_result_get_similarity(result); + + cugraph_type_erased_device_array_view_t *result_v1; + cugraph_type_erased_device_array_view_t *result_v2; + + result_v1 = cugraph_vertex_pairs_get_first(vertex_pairs); + result_v2 = cugraph_vertex_pairs_get_second(vertex_pairs); + size_t result_num_pairs = cugraph_type_erased_device_array_view_size(result_v1); + + TEST_ASSERT(test_ret_value, result_num_pairs == num_pairs, "Incorrect number of results"); + + vertex_t h_result_v1[result_num_pairs]; + vertex_t h_result_v2[result_num_pairs]; + weight_t h_similarity_coefficient[result_num_pairs]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_v1, result_v1, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_v2, result_v2, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_similarity_coefficient, similarity_coefficient, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + weight_t result_matrix[num_vertices][num_vertices]; + for (int i = 0 ; i < num_vertices ; ++i) + for (int j = 0 ; j < num_vertices ; ++j) + result_matrix[i][j] = 0; + + for (int i = 0 ; i < num_pairs ; ++i) + result_matrix[h_result_v1[i]][h_result_v2[i]] = h_similarity_coefficient[i]; + + for (int i = 0; (i < num_pairs) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(result_matrix[h_first[i]][h_second[i]], h_result[i], 0.001), + "similarity results don't match"); + } + + if (result != NULL) cugraph_similarity_result_free(result); + cugraph_sg_graph_free(graph); + cugraph_free_resource_handle(handle); + cugraph_error_free(ret_error); + + return test_ret_value; +} + int test_jaccard() { size_t num_edges = 16; @@ -296,6 +402,366 @@ int test_weighted_overlap() OVERLAP); } +int test_all_pairs_jaccard() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 22; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5}; + vertex_t h_second[] = {1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 3, 4, 5, 0, 1, 2, 4, 0, 2, 3, 1, 2}; + weight_t h_result[] = {0.2,0.25,0.666667,0.333333,0.2,0.4,0.166667,0.5,0.25,0.4,0.2,0.25,0.25,0.666667,0.166667,0.2,0.666667,0.333333,0.25,0.666667,0.5,0.25}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + SIZE_MAX, + JACCARD); +} + +int test_weighted_all_pairs_jaccard() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 16; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6}; + vertex_t h_second[] = {1, 2, 0, 2, 0, 1, 4, 5, 6, 3, 5, 6, 3, 4, 3, 4}; + weight_t h_result[] = {0.357143, 0.208333, 0.357143, 0.411765, 0.208333, 0.411765, 0.4, 0.0833333, 0.272727, 0.4, 0.222222, 0.352941, 0.0833333, 0.222222, 0.272727, 0.352941}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + SIZE_MAX, + JACCARD); +} + +int test_all_pairs_sorensen() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 22; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5}; + vertex_t h_second[] = {1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 3, 4, 5, 0, 1, 2, 4, 0, 2, 3, 1, 2}; + weight_t h_result[] = {0.333333, 0.4, 0.8, 0.5, 0.333333, 0.571429, 0.285714, 0.666667, 0.4, 0.571429, 0.333333, 0.4, 0.4, 0.8, 0.285714, 0.333333, 0.8, 0.5, 0.4, 0.8, 0.666667, 0.4}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + SIZE_MAX, + SORENSEN); +} + +int test_weighted_all_pairs_sorensen() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 16; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6}; + vertex_t h_second[] = {1, 2, 0, 2, 0, 1, 4, 5, 6, 3, 5, 6, 3, 4, 3, 4}; + weight_t h_result[] = {0.526316, 0.344828, 0.526316, 0.583333, 0.344828, 0.583333, 0.571429, 0.153846, 0.428571, 0.571429, 0.363636, 0.521739, 0.153846, 0.363636, 0.428571, 0.521739}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + SIZE_MAX, + SORENSEN); +} + +int test_all_pairs_overlap() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 22; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + + vertex_t h_first[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5}; + vertex_t h_second[] = {1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 3, 4, 5, 0, 1, 2, 4, 0, 2, 3, 1, 2}; + weight_t h_result[] = {0.5, 0.5, 1, 0.5, 0.5, 0.666667, 0.333333, 1, 0.5, 0.666667, 0.333333, 0.5, 0.5, 1, 0.333333, 0.333333, 1, 0.5, 0.5, 1, 1, 0.5}; + + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + SIZE_MAX, + OVERLAP); +} + +int test_weighted_all_pairs_overlap() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 16; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6}; + vertex_t h_second[] = {1, 2, 0, 2, 0, 1, 4, 5, 6, 3, 5, 6, 3, 4, 3, 4}; + weight_t h_result[] = {0.714286, 0.416667, 0.714286, 1, 0.416667, 1, 1, 0.166667, 0.5, 1, 0.571429, 0.75, 0.166667, 0.571429, 0.5, 0.75}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + SIZE_MAX, + OVERLAP); +} + +int test_all_pairs_jaccard_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t topk = 6; + size_t num_pairs = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 1, 3, 3, 4, 5}; + vertex_t h_second[] = {3, 5, 0, 4, 3, 1}; + weight_t h_result[] = {0.666667, 0.5, 0.666667, 0.666667, 0.666667, 0.5}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + topk, + JACCARD); +} + +int test_weighted_all_pairs_jaccard_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 1, 1, 2, 3, 4}; + vertex_t h_second[] = {1, 0, 2, 1, 4, 3}; + weight_t h_result[] = {0.357143, 0.357143, 0.411765, 0.411765, 0.4, 0.4}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + topk, + JACCARD); +} + +int test_all_pairs_sorensen_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 1, 3, 3, 4, 5}; + vertex_t h_second[] = {3, 5, 0, 4, 3, 1}; + weight_t h_result[] = {0.8, 0.666667, 0.8, 0.8, 0.8, 0.666667}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + topk, + SORENSEN); +} + +int test_weighted_all_pairs_sorensen_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 1, 1, 2, 3, 4}; + vertex_t h_second[] = {1, 0, 2, 1, 4, 3}; + weight_t h_result[] = {0.526316, 0.526316, 0.583333, 0.583333, 0.571429, 0.571429}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + topk, + SORENSEN); +} + +int test_all_pairs_overlap_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + + vertex_t h_first[] = {0, 1, 3, 3, 4, 5}; + vertex_t h_second[] = {3, 5, 0, 4, 3, 1}; + weight_t h_result[] = {1, 1, 1, 1, 1, 1}; + + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + topk, + OVERLAP); +} + +int test_weighted_all_pairs_overlap_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {1, 2, 3, 4, 4, 6}; + vertex_t h_second[] = {2, 1, 4, 3, 6, 4}; + weight_t h_result[] = {1, 1, 1, 1, 0.75, 0.75}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + topk, + OVERLAP); +} + /******************************************************************************/ int main(int argc, char** argv) @@ -307,5 +773,17 @@ int main(int argc, char** argv) result |= RUN_TEST(test_weighted_jaccard); result |= RUN_TEST(test_weighted_sorensen); result |= RUN_TEST(test_weighted_overlap); + result |= RUN_TEST(test_all_pairs_jaccard); + result |= RUN_TEST(test_all_pairs_sorensen); + result |= RUN_TEST(test_all_pairs_overlap); + result |= RUN_TEST(test_weighted_all_pairs_jaccard); + result |= RUN_TEST(test_weighted_all_pairs_sorensen); + result |= RUN_TEST(test_weighted_all_pairs_overlap); + result |= RUN_TEST(test_all_pairs_jaccard_topk); + result |= RUN_TEST(test_all_pairs_sorensen_topk); + result |= RUN_TEST(test_all_pairs_overlap_topk); + result |= RUN_TEST(test_weighted_all_pairs_jaccard_topk); + result |= RUN_TEST(test_weighted_all_pairs_sorensen_topk); + result |= RUN_TEST(test_weighted_all_pairs_overlap_topk); return result; } diff --git a/cpp/tests/c_api/two_hop_neighbors_test.c b/cpp/tests/c_api/two_hop_neighbors_test.c index d47280276c5..bc95db3932b 100644 --- a/cpp/tests/c_api/two_hop_neighbors_test.c +++ b/cpp/tests/c_api/two_hop_neighbors_test.c @@ -81,8 +81,8 @@ int generic_two_hop_nbr_test(vertex_t* h_src, ret_code = cugraph_two_hop_neighbors( resource_handle, graph, start_vertices_view, FALSE, &result, &ret_error); - cugraph_type_erased_device_array_view_t* v1; - cugraph_type_erased_device_array_view_t* v2; + cugraph_type_erased_device_array_view_t const* v1; + cugraph_type_erased_device_array_view_t const* v2; v1 = cugraph_vertex_pairs_get_first(result); v2 = cugraph_vertex_pairs_get_second(result); diff --git a/cpp/tests/c_api/uniform_neighbor_sample_test.c b/cpp/tests/c_api/uniform_neighbor_sample_test.c index 92f3821e3cc..15b2e937661 100644 --- a/cpp/tests/c_api/uniform_neighbor_sample_test.c +++ b/cpp/tests/c_api/uniform_neighbor_sample_test.c @@ -333,7 +333,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle if (renumber_results) { size_t num_vertex_ids = 2 * (h_result_offsets[label_id+1] - h_result_offsets[label_id]); vertex_t vertex_ids[num_vertex_ids]; - + for (size_t i = 0 ; (i < (h_result_offsets[label_id+1] - h_result_offsets[label_id])) && (test_ret_value == 0) ; ++i) { vertex_ids[2*i] = h_result_srcs[h_result_offsets[label_id] + i]; vertex_ids[2*i+1] = h_result_dsts[h_result_offsets[label_id] + i]; @@ -345,7 +345,7 @@ int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle for (size_t i = 0 ; (i < num_vertex_ids) && (test_ret_value == 0) ; ++i) { if (vertex_ids[i] == current_v) ++current_v; - else + else TEST_ASSERT(test_ret_value, vertex_ids[i] == (current_v - 1), "vertices are not properly renumbered"); diff --git a/cpp/tests/community/ecg_test.cpp b/cpp/tests/community/legacy_ecg_test.cpp similarity index 98% rename from cpp/tests/community/ecg_test.cpp rename to cpp/tests/community/legacy_ecg_test.cpp index 66950b97787..c061215415a 100644 --- a/cpp/tests/community/ecg_test.cpp +++ b/cpp/tests/community/legacy_ecg_test.cpp @@ -8,8 +8,6 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. * */ -#include "utilities/base_fixture.hpp" - #include #include @@ -17,12 +15,14 @@ #include +#include + // FIXME: Temporarily disable this test. Something is wrong with // ECG, or the expectation of this test. If I run ensemble size // of 24 this fails. It also fails with the SG Louvain change // for PR 1271 #if 0 -TEST(ecg, success) +TEST(legacy_ecg, success) { // FIXME: verify that this is the karate dataset std::vector off_h = {0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, @@ -77,7 +77,7 @@ TEST(ecg, success) } #endif -TEST(ecg, dolphin) +TEST(legacy_ecg, dolphin) { raft::handle_t handle; diff --git a/cpp/tests/link_prediction/similarity_compare.hpp b/cpp/tests/link_prediction/similarity_compare.hpp index bbd942e2664..eed0a82fe7e 100644 --- a/cpp/tests/link_prediction/similarity_compare.hpp +++ b/cpp/tests/link_prediction/similarity_compare.hpp @@ -53,6 +53,18 @@ struct test_jaccard_t { { return cugraph::jaccard_coefficients(handle, graph_view, edge_weight_view, vertex_pairs, true); } + + template + auto run(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + bool use_weights, + std::optional topk) const + { + return cugraph::jaccard_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct test_sorensen_t { @@ -82,6 +94,18 @@ struct test_sorensen_t { { return cugraph::sorensen_coefficients(handle, graph_view, edge_weight_view, vertex_pairs, true); } + + template + auto run(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + bool use_weights, + std::optional topk) const + { + return cugraph::sorensen_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct test_overlap_t { @@ -111,6 +135,18 @@ struct test_overlap_t { { return cugraph::overlap_coefficients(handle, graph_view, edge_weight_view, vertex_pairs, true); } + + template + auto run(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + bool use_weights, + std::optional topk) const + { + return cugraph::overlap_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; template diff --git a/cpp/tests/link_prediction/similarity_test.cu b/cpp/tests/link_prediction/similarity_test.cu new file mode 100644 index 00000000000..f5c15c760e2 --- /dev/null +++ b/cpp/tests/link_prediction/similarity_test.cu @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +struct Similarity_Usecase { + bool use_weights{false}; + bool check_correctness{true}; + bool all_pairs{false}; + std::optional max_seeds{std::nullopt}; + std::optional max_vertex_pairs_to_check{std::nullopt}; + std::optional topk{std::nullopt}; +}; + +template +class Tests_Similarity + : public ::testing::TestWithParam> { + public: + Tests_Similarity() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param, + test_functor_t const& test_functor) + { + constexpr bool renumber = true; + auto [similarity_usecase, input_usecase] = param; + + // 1. initialize handle + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + // 2. create SG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Construct graph"); + } + + auto [graph, edge_weights, d_renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, similarity_usecase.use_weights, renumber); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. run similarity + + auto graph_view = graph.view(); + auto edge_weight_view = + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Similarity test"); + } + + rmm::device_uvector v1(0, handle.get_stream()); + rmm::device_uvector v2(0, handle.get_stream()); + rmm::device_uvector result_score(0, handle.get_stream()); + + raft::random::RngState rng_state{0}; + + rmm::device_uvector sources(0, handle.get_stream()); + std::optional> sources_span{std::nullopt}; + + if (similarity_usecase.max_seeds) { + sources = cugraph::select_random_vertices( + handle, + graph_view, + std::optional>{std::nullopt}, + rng_state, + std::min(*similarity_usecase.max_seeds, + static_cast(graph_view.number_of_vertices())), + false, + false); + sources_span = raft::device_span{sources.data(), sources.size()}; + } + + if (similarity_usecase.all_pairs) { + std::tie(v1, v2, result_score) = test_functor.run(handle, + graph_view, + edge_weight_view, + sources_span, + similarity_usecase.use_weights, + similarity_usecase.topk); + } else { + if (!sources_span) { + sources.resize(graph_view.number_of_vertices(), handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), sources.begin(), sources.end(), vertex_t{0}); + sources_span = raft::device_span{sources.data(), sources.size()}; + } + + rmm::device_uvector offsets(0, handle.get_stream()); + + std::tie(offsets, v2) = k_hop_nbrs(handle, graph_view, *sources_span, 2, true); + + v1 = cugraph::detail::expand_sparse_offsets( + raft::device_span{offsets.data(), offsets.size()}, + vertex_t{0}, + handle.get_stream()); + + cugraph::unrenumber_local_int_vertices(handle, + v1.data(), + v1.size(), + sources.data(), + vertex_t{0}, + static_cast(sources.size()), + true); + + auto new_size = thrust::distance( + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::make_zip_iterator(v1.end(), v2.end()), + [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); })); + + v1.resize(new_size, handle.get_stream()); + v2.resize(new_size, handle.get_stream()); + + // FIXME: Need to add some tests that specify actual vertex pairs + std::tuple, raft::device_span> vertex_pairs{ + {v1.data(), v1.size()}, {v2.data(), v2.size()}}; + + result_score = test_functor.run( + handle, graph_view, edge_weight_view, vertex_pairs, similarity_usecase.use_weights); + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (similarity_usecase.check_correctness) { + auto [src, dst, wgt] = cugraph::test::graph_to_host_coo(handle, graph_view, edge_weight_view); + + size_t check_size = similarity_usecase.max_vertex_pairs_to_check + ? std::min(v1.size(), *similarity_usecase.max_vertex_pairs_to_check) + : v1.size(); + + // + // FIXME: Need to reorder here. thrust::shuffle on the tuples (vertex_pairs_1, + // vertex_pairs_2, result_score) would + // be sufficient. + // + std::vector h_vertex_pair_1(check_size); + std::vector h_vertex_pair_2(check_size); + std::vector h_result_score(check_size); + + raft::update_host(h_vertex_pair_1.data(), v1.data(), check_size, handle.get_stream()); + raft::update_host(h_vertex_pair_2.data(), v2.data(), check_size, handle.get_stream()); + raft::update_host( + h_result_score.data(), result_score.data(), check_size, handle.get_stream()); + + if (similarity_usecase.use_weights) { + weighted_similarity_compare(graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair_1, h_vertex_pair_2), + h_result_score, + test_functor); + } else { + similarity_compare(graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair_1, h_vertex_pair_2), + h_result_score, + test_functor); + } + } + } +}; + +using Tests_Similarity_File = Tests_Similarity; +using Tests_Similarity_Rmat = Tests_Similarity; + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatJaccard) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_Similarity_File, + ::testing::Combine(::testing::Values(Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 20, 100, 10}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 20, 100, 10}), +#if 0 + // FIXME: See Issue #4132... these tests don't work for multi-graph right now + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100, 10}, + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100, 10}), +#endif + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_Similarity_Rmat, + ::testing::Combine( + ::testing::Values(Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 1000, 100, 10}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 10000, 10000, 10}, +#if 0 + // FIXME: See Issue #4132... these tests don't work for multi-graph right now + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100, 10}, + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100, 10}, +#endif + Similarity_Usecase{false, true, true, std::nullopt, std::nullopt, 100}, + Similarity_Usecase{false, true, true, std::nullopt, std::nullopt, 10}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with + --gtest_filter to select only the file_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one File_Usecase that differ only in filename + (to avoid running same benchmarks more than once) */ + Tests_Similarity_File, + ::testing::Combine( + // disable correctness checks + // Disable weighted computation testing in 22.10 + //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{false, false, false}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_Similarity_Rmat, + ::testing::Combine( + // disable correctness checks for large graphs + //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{false, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/mtmg/threaded_test_louvain.cu b/cpp/tests/mtmg/threaded_test_louvain.cu index d1e12057230..ff9641d59f8 100644 --- a/cpp/tests/mtmg/threaded_test_louvain.cu +++ b/cpp/tests/mtmg/threaded_test_louvain.cu @@ -276,6 +276,7 @@ class Tests_Multithreaded std::tie(std::ignore, modularity) = cugraph::louvain( thread_handle.raft_handle(), + std::nullopt, graph_view.get(thread_handle), edge_weights ? std::make_optional(edge_weights->get(thread_handle).view()) : std::nullopt, local_louvain_clusters.data(), @@ -405,6 +406,7 @@ class Tests_Multithreaded std::tie(std::ignore, modularity) = cugraph::louvain( handle, + std::nullopt, sg_graph.view(), sg_edge_weights ? std::make_optional(sg_edge_weights->view()) : std::nullopt, sg_clusters.data(), diff --git a/datasets/README.md b/datasets/README.md index a23dc644081..53f180dfdc1 100644 --- a/datasets/README.md +++ b/datasets/README.md @@ -18,11 +18,11 @@ This directory contains small public datasets in `mtx` and `csv` format used by -### Modified datasets +### Modified datasets The datasets below were added to provide input that contains self-loops, string vertex IDs, isolated vertices, and multiple edges. -| Graph | V | E | Directed | Weighted | self-loops | Isolated V | String V IDs | Multi-edges | +| Graph | V | E | Directed | Weighted | self-loops | Isolated V | String V IDs | Multi-edges | | ------------------- | ------- | ---------- | -------- | --------- | ---------- | ---------- | ------------ | ----------- | | karate_multi_edge | 34 | 160 | No | Yes | No | No | No | Yes | | dolphins_multi_edge | 62 | 325 | No | Yes | No | No | No | Yes | @@ -46,22 +46,22 @@ The datasets below were added to provide input that contains self-loops, string ### Additional datasets -Larger datasets containing self-loops can be downloaded by running the provided script from the `datasets` directory using the `--self_loops` -option: +Larger datasets containing self-loops can be downloaded by running the provided script from the `datasets` directory using the `--self_loops` +option: ``` cd /datasets ./get_test_data.sh --self_loops ``` ``` /datasets/self_loops - |-ca-AstroPh (5.3M) + |-ca-AstroPh (5.3M) |-ca-CondMat (2.8M) |-ca-GrQc (348K) |-ca-HepTh (763K) ``` These datasets are not currently used by any tests or benchmarks -| Graph | V | E | Directed | Weighted | self-loops | Isolated V | String V IDs | Multi-edges | +| Graph | V | E | Directed | Weighted | self-loops | Isolated V | String V IDs | Multi-edges | | ------------- | ------- | -------- | -------- | -------- | ---------- | ---------- | ------------ | ----------- | | ca-AstroPh | 18,772 | 198,110 | No | No | Yes | No | No | No | | ca-CondMat | 23,133 | 93,497 | No | Yes | Yes | No | No | No | @@ -77,7 +77,7 @@ These datasets are not currently used by any tests or benchmarks **ca-HepTh** : The graph "ca-HepTh" covers scientific collaborations between authors papers submitted to High Energy Physics - Theory category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007. -## Custom path to larger datasets directory +## Custom path to larger datasets directory Cugraph's C++ and Python analytics tests need larger datasets (>5GB uncompressed) and reference results (>125MB uncompressed). They can be downloaded by running the provided script from the `datasets` directory. ``` @@ -133,4 +133,4 @@ _NOTE: the benchmark datasets were converted to a CSV format from their original ## Reference The SuiteSparse Matrix Collection (formerly the University of Florida Sparse Matrix Collection) : https://sparse.tamu.edu/ -The Stanford Network Analysis Platform (SNAP) +The Stanford Network Analysis Platform (SNAP) diff --git a/datasets/dolphins.mtx b/datasets/dolphins.mtx index 581940502d3..4b5bc79a180 100644 --- a/datasets/dolphins.mtx +++ b/datasets/dolphins.mtx @@ -13,25 +13,25 @@ % kind: undirected graph %------------------------------------------------------------------------------- % notes: -% Network collection from M. Newman -% http://www-personal.umich.edu/~mejn/netdata/ -% -% The graph dolphins contains an undirected social network of frequent -% associations between 62 dolphins in a community living off Doubtful Sound, -% New Zealand, as compiled by Lusseau et al. (2003). Please cite -% -% D. Lusseau, K. Schneider, O. J. Boisseau, P. Haase, E. Slooten, and +% Network collection from M. Newman +% http://www-personal.umich.edu/~mejn/netdata/ +% +% The graph dolphins contains an undirected social network of frequent +% associations between 62 dolphins in a community living off Doubtful Sound, +% New Zealand, as compiled by Lusseau et al. (2003). Please cite +% +% D. Lusseau, K. Schneider, O. J. Boisseau, P. Haase, E. Slooten, and % S. M. Dawson, The bottlenose dolphin community of Doubtful Sound features -% a large proportion of long-lasting associations, Behavioral Ecology and -% Sociobiology 54, 396-405 (2003). -% -% Additional information on the network can be found in -% -% D. Lusseau, The emergent properties of a dolphin social network, -% Proc. R. Soc. London B (suppl.) 270, S186-S188 (2003). -% -% D. Lusseau, Evidence for social role in a dolphin social network, -% Preprint q-bio/0607048 (http://arxiv.org/abs/q-bio.PE/0607048) +% a large proportion of long-lasting associations, Behavioral Ecology and +% Sociobiology 54, 396-405 (2003). +% +% Additional information on the network can be found in +% +% D. Lusseau, The emergent properties of a dolphin social network, +% Proc. R. Soc. London B (suppl.) 270, S186-S188 (2003). +% +% D. Lusseau, Evidence for social role in a dolphin social network, +% Preprint q-bio/0607048 (http://arxiv.org/abs/q-bio.PE/0607048) %------------------------------------------------------------------------------- 62 62 159 11 1 diff --git a/datasets/karate.mtx b/datasets/karate.mtx index 59df7607443..9ecdff42d7e 100644 --- a/datasets/karate.mtx +++ b/datasets/karate.mtx @@ -12,14 +12,14 @@ % kind: undirected graph %------------------------------------------------------------------------------- % notes: -% Network collection from M. Newman -% http://www-personal.umich.edu/~mejn/netdata/ -% -% The graph "karate" contains the network of friendships between the 34 -% members of a karate club at a US university, as described by Wayne Zachary +% Network collection from M. Newman +% http://www-personal.umich.edu/~mejn/netdata/ +% +% The graph "karate" contains the network of friendships between the 34 +% members of a karate club at a US university, as described by Wayne Zachary % in 1977. If you use these data in your work, please cite W. W. Zachary, An % information flow model for conflict and fission in small groups, Journal of -% Anthropological Research 33, 452-473 (1977). +% Anthropological Research 33, 452-473 (1977). %------------------------------------------------------------------------------- 34 34 78 2 1 diff --git a/datasets/karate_mod.mtx b/datasets/karate_mod.mtx index 3a562406800..072d4d9185c 100644 --- a/datasets/karate_mod.mtx +++ b/datasets/karate_mod.mtx @@ -39,7 +39,7 @@ 11 6 17 6 17 7 -31 9 +31 9 33 9 34 9 34 10 diff --git a/datasets/karate_str.mtx b/datasets/karate_str.mtx index 0564d30f91d..3708a722643 100644 --- a/datasets/karate_str.mtx +++ b/datasets/karate_str.mtx @@ -11,7 +11,7 @@ cq a9 1 ca a9 1 gd a9 1 y4 a9 1 -kx a9 1 +kx a9 1 u3 a9 1 id a9 1 ts 9q 1 diff --git a/datasets/netscience.mtx b/datasets/netscience.mtx index 71a75131995..8e1c0c1077c 100644 --- a/datasets/netscience.mtx +++ b/datasets/netscience.mtx @@ -13,23 +13,23 @@ % kind: undirected weighted graph %------------------------------------------------------------------------------- % notes: -% Network collection from M. Newman -% http://www-personal.umich.edu/~mejn/netdata/ -% -% The graph netscience contains a coauthorship network of scientists -% working on network theory and experiment, as compiled by M. Newman in May -% 2006. The network was compiled from the bibliographies of two review -% articles on networks, M. E. J. Newman, SIAM Review 45, 167-256 (2003) and -% S. Boccaletti et al., Physics Reports 424, 175-308 (2006), with a few -% additional references added by hand. The version given here contains all +% Network collection from M. Newman +% http://www-personal.umich.edu/~mejn/netdata/ +% +% The graph netscience contains a coauthorship network of scientists +% working on network theory and experiment, as compiled by M. Newman in May +% 2006. The network was compiled from the bibliographies of two review +% articles on networks, M. E. J. Newman, SIAM Review 45, 167-256 (2003) and +% S. Boccaletti et al., Physics Reports 424, 175-308 (2006), with a few +% additional references added by hand. The version given here contains all % components of the network, for a total of 1589 scientists, and not just the -% largest component of 379 scientists previously published. The network is -% weighted, with weights assigned as described in M. E. J. Newman, -% Phys. Rev. E 64, 016132 (2001). -% -% If you make use of these data, please cite M. E. J. Newman, Finding -% community structure in networks using the eigenvectors of matrices, -% Preprint physics/0605087 (2006). +% largest component of 379 scientists previously published. The network is +% weighted, with weights assigned as described in M. E. J. Newman, +% Phys. Rev. E 64, 016132 (2001). +% +% If you make use of these data, please cite M. E. J. Newman, Finding +% community structure in networks using the eigenvectors of matrices, +% Preprint physics/0605087 (2006). %------------------------------------------------------------------------------- 1589 1589 2742 2 1 2.5 diff --git a/datasets/polbooks.mtx b/datasets/polbooks.mtx index 65266677b39..30479086d0a 100644 --- a/datasets/polbooks.mtx +++ b/datasets/polbooks.mtx @@ -13,24 +13,24 @@ % kind: undirected graph %------------------------------------------------------------------------------- % notes: -% Network collection from M. Newman -% http://www-personal.umich.edu/~mejn/netdata/ -% -% Books about US politics -% Compiled by Valdis Krebs -% -% Nodes represent books about US politics sold by the online bookseller -% Amazon.com. Edges represent frequent co-purchasing of books by the same -% buyers, as indicated by the "customers who bought this book also bought -% these other books" feature on Amazon. -% +% Network collection from M. Newman +% http://www-personal.umich.edu/~mejn/netdata/ +% +% Books about US politics +% Compiled by Valdis Krebs +% +% Nodes represent books about US politics sold by the online bookseller +% Amazon.com. Edges represent frequent co-purchasing of books by the same +% buyers, as indicated by the "customers who bought this book also bought +% these other books" feature on Amazon. +% % Nodes have been given values "l", "n", or "c" to indicate whether they are -% "liberal", "neutral", or "conservative". These alignments were assigned -% separately by Mark Newman based on a reading of the descriptions and -% reviews of the books posted on Amazon. -% -% These data should be cited as V. Krebs, unpublished, -% http://www.orgnet.com/. +% "liberal", "neutral", or "conservative". These alignments were assigned +% separately by Mark Newman based on a reading of the descriptions and +% reviews of the books posted on Amazon. +% +% These data should be cited as V. Krebs, unpublished, +% http://www.orgnet.com/. %------------------------------------------------------------------------------- 105 105 441 2 1 diff --git a/datasets/small_line.csv b/datasets/small_line.csv index 68751f432a2..55494314212 100644 --- a/datasets/small_line.csv +++ b/datasets/small_line.csv @@ -6,4 +6,4 @@ 5 6 1.0 6 7 1.0 7 8 1.0 -8 9 1.0 \ No newline at end of file +8 9 1.0 diff --git a/datasets/toy_graph.csv b/datasets/toy_graph.csv index 02069f5e153..cb07d7b1ff9 100644 --- a/datasets/toy_graph.csv +++ b/datasets/toy_graph.csv @@ -13,4 +13,4 @@ 1 2 3.1 3 2 4.1 5 3 7.2 -5 4 3.2 \ No newline at end of file +5 4 3.2 diff --git a/datasets/toy_graph_undirected.csv b/datasets/toy_graph_undirected.csv index dec4a956c85..cafd02ae285 100644 --- a/datasets/toy_graph_undirected.csv +++ b/datasets/toy_graph_undirected.csv @@ -5,4 +5,4 @@ 2 1 3.1 2 3 4.1 3 5 7.2 -4 5 3.2 \ No newline at end of file +4 5 3.2 diff --git a/docs/cugraph/Makefile b/docs/cugraph/Makefile index f92d0be6910..ac16367ef52 100644 --- a/docs/cugraph/Makefile +++ b/docs/cugraph/Makefile @@ -23,4 +23,4 @@ clean: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/cugraph/README.md b/docs/cugraph/README.md index 734867af965..970a03a040c 100644 --- a/docs/cugraph/README.md +++ b/docs/cugraph/README.md @@ -5,7 +5,7 @@ All prerequisite for building docs are in the cugraph development conda environm ## Steps to follow: -In order to build the docs, we need the conda dev environment from cugraph and we need to build cugraph from source. +In order to build the docs, we need the conda dev environment from cugraph and we need to build cugraph from source. 1. Create a conda env and build cugraph from source. The dependencies to build rapids from source are installed in that conda environment, and then rapids is built and installed into the same environment. diff --git a/docs/cugraph/source/_static/references.css b/docs/cugraph/source/_static/references.css index 225cf13ba94..d1f647233a8 100644 --- a/docs/cugraph/source/_static/references.css +++ b/docs/cugraph/source/_static/references.css @@ -20,4 +20,4 @@ dl.citation > dt.label > span::before { /* Add closing bracket */ dl.citation > dt.label > span::after { content: "]"; -} \ No newline at end of file +} diff --git a/docs/cugraph/source/api_docs/cugraph-ops/c_cpp/index.rst b/docs/cugraph/source/api_docs/cugraph-ops/c_cpp/index.rst index 5545bebe975..39dae955ef3 100644 --- a/docs/cugraph/source/api_docs/cugraph-ops/c_cpp/index.rst +++ b/docs/cugraph/source/api_docs/cugraph-ops/c_cpp/index.rst @@ -1,3 +1,2 @@ cugraph-ops C++ API Reference ============================= - diff --git a/docs/cugraph/source/api_docs/cugraph-ops/index.rst b/docs/cugraph/source/api_docs/cugraph-ops/index.rst index fdfd5baab96..0f6a6c937d3 100644 --- a/docs/cugraph/source/api_docs/cugraph-ops/index.rst +++ b/docs/cugraph/source/api_docs/cugraph-ops/index.rst @@ -8,4 +8,4 @@ This page provides a list of all publicly accessible modules, methods and classe :caption: API Documentation python/index - c_cpp/index \ No newline at end of file + c_cpp/index diff --git a/docs/cugraph/source/api_docs/cugraph-ops/python/index.rst b/docs/cugraph/source/api_docs/cugraph-ops/python/index.rst index 082c7741f23..fb25f2fa005 100644 --- a/docs/cugraph/source/api_docs/cugraph-ops/python/index.rst +++ b/docs/cugraph/source/api_docs/cugraph-ops/python/index.rst @@ -1,5 +1,5 @@ cugraph-ops Python API reference -================================ +================================ This page provides a list of all publicly accessible modules, methods and classes through `pylibcugraphops.*` namespace. diff --git a/docs/cugraph/source/api_docs/cugraph-pyg/cugraph_pyg.rst b/docs/cugraph/source/api_docs/cugraph-pyg/cugraph_pyg.rst index f7d7f5f2262..a150d4db9fe 100644 --- a/docs/cugraph/source/api_docs/cugraph-pyg/cugraph_pyg.rst +++ b/docs/cugraph/source/api_docs/cugraph-pyg/cugraph_pyg.rst @@ -11,4 +11,3 @@ cugraph-pyg .. cugraph_pyg.data.cugraph_store.EXPERIMENTAL__CuGraphStore .. cugraph_pyg.sampler.cugraph_sampler.EXPERIMENTAL__CuGraphSampler - diff --git a/docs/cugraph/source/api_docs/cugraph/components.rst b/docs/cugraph/source/api_docs/cugraph/components.rst index 560aa1f8ca0..e61291fccf0 100644 --- a/docs/cugraph/source/api_docs/cugraph/components.rst +++ b/docs/cugraph/source/api_docs/cugraph/components.rst @@ -20,4 +20,3 @@ Connected Components (MG) :toctree: ../api/cugraph/ cugraph.dask.components.connectivity.weakly_connected_components - diff --git a/docs/cugraph/source/api_docs/cugraph/dask-cugraph.rst b/docs/cugraph/source/api_docs/cugraph/dask-cugraph.rst index a2d49789053..d9ba3f3a792 100644 --- a/docs/cugraph/source/api_docs/cugraph/dask-cugraph.rst +++ b/docs/cugraph/source/api_docs/cugraph/dask-cugraph.rst @@ -48,12 +48,12 @@ Example ) input_data.to_csv(input_data_path, index=False) - # helper function to set the reader chunk size to automatically get one partition per GPU + # helper function to set the reader chunk size to automatically get one partition per GPU chunksize = dask_cugraph.get_chunksize(input_data_path) # multi-GPU CSV reader e_list = dask_cudf.read_csv( - input_data_path, + input_data_path, chunksize=chunksize, names=['src', 'dst'], dtype=['int32', 'int32'], diff --git a/docs/cugraph/source/api_docs/cugraph/generators.rst b/docs/cugraph/source/api_docs/cugraph/generators.rst index 4f93d943e6c..f5180a172cd 100644 --- a/docs/cugraph/source/api_docs/cugraph/generators.rst +++ b/docs/cugraph/source/api_docs/cugraph/generators.rst @@ -11,4 +11,3 @@ RMAT :toctree: ../api/cugraph/ cugraph.generators.rmat - diff --git a/docs/cugraph/source/api_docs/cugraph/graph_implementation.rst b/docs/cugraph/source/api_docs/cugraph/graph_implementation.rst index 91c16c24248..ae14306ce27 100644 --- a/docs/cugraph/source/api_docs/cugraph/graph_implementation.rst +++ b/docs/cugraph/source/api_docs/cugraph/graph_implementation.rst @@ -31,4 +31,4 @@ Graph Implementation edges nodes neighbors - vertex_column_size \ No newline at end of file + vertex_column_size diff --git a/docs/cugraph/source/api_docs/cugraph/helper_functions.rst b/docs/cugraph/source/api_docs/cugraph/helper_functions.rst index 02cb599ae55..e7091c50c44 100644 --- a/docs/cugraph/source/api_docs/cugraph/helper_functions.rst +++ b/docs/cugraph/source/api_docs/cugraph/helper_functions.rst @@ -20,4 +20,3 @@ Methods cugraph.dask.comms.comms.get_handle cugraph.dask.comms.comms.get_worker_id cugraph.dask.common.read_utils.get_chunksize - diff --git a/docs/cugraph/source/api_docs/cugraph/layout.rst b/docs/cugraph/source/api_docs/cugraph/layout.rst index ed97caf549f..d416676a62e 100644 --- a/docs/cugraph/source/api_docs/cugraph/layout.rst +++ b/docs/cugraph/source/api_docs/cugraph/layout.rst @@ -10,4 +10,3 @@ Force Atlas 2 :toctree: ../api/cugraph/ cugraph.force_atlas2 - diff --git a/docs/cugraph/source/api_docs/cugraph/link_analysis.rst b/docs/cugraph/source/api_docs/cugraph/link_analysis.rst index 48b5ec1176f..698880c1b9e 100644 --- a/docs/cugraph/source/api_docs/cugraph/link_analysis.rst +++ b/docs/cugraph/source/api_docs/cugraph/link_analysis.rst @@ -32,4 +32,3 @@ Pagerank (MG) :toctree: ../api/cugraph/ cugraph.dask.link_analysis.pagerank.pagerank - diff --git a/docs/cugraph/source/api_docs/cugraph/tree.rst b/docs/cugraph/source/api_docs/cugraph/tree.rst index 5ba2242ebb6..d8a89046aa3 100644 --- a/docs/cugraph/source/api_docs/cugraph/tree.rst +++ b/docs/cugraph/source/api_docs/cugraph/tree.rst @@ -18,4 +18,3 @@ Maximum Spanning Tree :toctree: ../api/cugraph/ cugraph.tree.minimum_spanning_tree.maximum_spanning_tree - diff --git a/docs/cugraph/source/api_docs/cugraph_c/centrality.rst b/docs/cugraph/source/api_docs/cugraph_c/centrality.rst index f34e26ad76e..3bea608fd5a 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/centrality.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/centrality.rst @@ -48,4 +48,3 @@ Centrality Support Functions :project: libcugraph :members: :content-only: - diff --git a/docs/cugraph/source/api_docs/cugraph_c/community.rst b/docs/cugraph/source/api_docs/cugraph_c/community.rst index d55325720c4..6b500a972a7 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/community.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/community.rst @@ -54,4 +54,3 @@ Community Support Functions :project: libcugraph :members: :content-only: - diff --git a/docs/cugraph/source/api_docs/cugraph_c/labeling.rst b/docs/cugraph/source/api_docs/cugraph_c/labeling.rst index 4ca598c0a06..2b709ebd343 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/labeling.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/labeling.rst @@ -17,4 +17,4 @@ Labeling Support Functions .. doxygengroup:: labeling :project: libcugraph :members: - :content-only: \ No newline at end of file + :content-only: diff --git a/docs/cugraph/source/api_docs/cugraph_c/similarity.rst b/docs/cugraph/source/api_docs/cugraph_c/similarity.rst index 200ba695781..75735925e4d 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/similarity.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/similarity.rst @@ -22,4 +22,4 @@ Similarty Support Functions .. doxygengroup:: similarity :project: libcugraph :members: - :content-only: \ No newline at end of file + :content-only: diff --git a/docs/cugraph/source/api_docs/cugraph_c/traversal.rst b/docs/cugraph/source/api_docs/cugraph_c/traversal.rst index 1578951e05f..bde30f4fa6e 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/traversal.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/traversal.rst @@ -27,4 +27,4 @@ Traversal Support Functions .. doxygengroup:: traversal :project: libcugraph :members: - :content-only: \ No newline at end of file + :content-only: diff --git a/docs/cugraph/source/api_docs/index.rst b/docs/cugraph/source/api_docs/index.rst index 74ca98bb98d..1b907165a39 100644 --- a/docs/cugraph/source/api_docs/index.rst +++ b/docs/cugraph/source/api_docs/index.rst @@ -35,5 +35,3 @@ Additional Graph Packages API Documentation :caption: Additional Graph Packages API Documentation service/index.rst - - diff --git a/docs/cugraph/source/api_docs/service/index.rst b/docs/cugraph/source/api_docs/service/index.rst index a58cf207456..ca251e475d4 100644 --- a/docs/cugraph/source/api_docs/service/index.rst +++ b/docs/cugraph/source/api_docs/service/index.rst @@ -7,4 +7,4 @@ cugraph-service API Reference :caption: cugraph-service API Documentation cugraph_service_client - cugraph_service_server \ No newline at end of file + cugraph_service_server diff --git a/docs/cugraph/source/basics/cugraph_cascading.md b/docs/cugraph/source/basics/cugraph_cascading.md index 7795e9e007a..bad3d7fa6a8 100644 --- a/docs/cugraph/source/basics/cugraph_cascading.md +++ b/docs/cugraph/source/basics/cugraph_cascading.md @@ -3,7 +3,7 @@ BLUF: cuGraph does not support method cascading -[Method Cascading](https://en.wikipedia.org/wiki/Method_cascading) is a popular, and useful, functional programming concept and is a great way to make code more readable. Python supports method cascading ... _for the most part_. There are a number of Python built-in classes that do not support cascading. +[Method Cascading](https://en.wikipedia.org/wiki/Method_cascading) is a popular, and useful, functional programming concept and is a great way to make code more readable. Python supports method cascading ... _for the most part_. There are a number of Python built-in classes that do not support cascading. An example, from cuDF, is a sequence of method calls for loading data and then finding the largest values from a subset of the data (yes there are other ways this could be done): @@ -11,13 +11,13 @@ An example, from cuDF, is a sequence of method calls for loading data and then gdf = cudf.from_pandas(df).query(‘val > 200’).nlargest(‘va’3) ``` -cuGraph does not support method cascading for two main reasons: (1) the object-oriented nature of the Graph data object leverages in-place methods, and (2) the fact that algorithms operate on graphs rather than graphs running algorithms. +cuGraph does not support method cascading for two main reasons: (1) the object-oriented nature of the Graph data object leverages in-place methods, and (2) the fact that algorithms operate on graphs rather than graphs running algorithms. ## Graph Data Objects -cuGraph follows an object-oriented design for the Graph objects. Users create a Graph and can then add data to object, but every add method call returns `None`. +cuGraph follows an object-oriented design for the Graph objects. Users create a Graph and can then add data to object, but every add method call returns `None`. _Why Inplace methods?_
-cuGraph focuses on the big graph problems where there are 10s of millions to trillions of edges (Giga bytes to Terabytes of data). At that scale, creating a copy of the data becomes memory inefficient. +cuGraph focuses on the big graph problems where there are 10s of millions to trillions of edges (Giga bytes to Terabytes of data). At that scale, creating a copy of the data becomes memory inefficient. _Why not return `self` rather than `None`?_
It would be simple to modify the methods to return `self` rather than `None`, however it opens the methods to misinterpretation. Consider the following code: diff --git a/docs/cugraph/source/basics/cugraph_intro.md b/docs/cugraph/source/basics/cugraph_intro.md index 10d14f8a0d7..7ad2825604a 100644 --- a/docs/cugraph/source/basics/cugraph_intro.md +++ b/docs/cugraph/source/basics/cugraph_intro.md @@ -1,51 +1,51 @@ # cuGraph Introduction -The Data Scientist has a collection of techniques within their -proverbial toolbox. Data engineering, statistical analysis, and -machine learning are among the most commonly known. However, there -are numerous cases where the focus of the analysis is on the -relationship between data elements. In those cases, the data is best -represented as a graph. Graph analysis, also called network analysis, -is a collection of algorithms for answering questions posed against +The Data Scientist has a collection of techniques within their +proverbial toolbox. Data engineering, statistical analysis, and +machine learning are among the most commonly known. However, there +are numerous cases where the focus of the analysis is on the +relationship between data elements. In those cases, the data is best +represented as a graph. Graph analysis, also called network analysis, +is a collection of algorithms for answering questions posed against graph data. Graph analysis is not new. -The first graph problem was posed by Euler in 1736, the [Seven Bridges of -Konigsberg](https://en.wikipedia.org/wiki/Seven_Bridges_of_K%C3%B6nigsberg), -and laid the foundation for the mathematical field of graph theory. -The application of graph analysis covers a wide variety of fields, including +The first graph problem was posed by Euler in 1736, the [Seven Bridges of +Konigsberg](https://en.wikipedia.org/wiki/Seven_Bridges_of_K%C3%B6nigsberg), +and laid the foundation for the mathematical field of graph theory. +The application of graph analysis covers a wide variety of fields, including marketing, biology, physics, computer science, sociology, and cyber to name a few. -RAPIDS cuGraph is a library of graph algorithms that seamlessly integrates -into the RAPIDS data science ecosystem and allows the data scientist to easily -call graph algorithms using data stored in a GPU DataFrame, NetworkX Graphs, or even -CuPy or SciPy sparse Matrix. +RAPIDS cuGraph is a library of graph algorithms that seamlessly integrates +into the RAPIDS data science ecosystem and allows the data scientist to easily +call graph algorithms using data stored in a GPU DataFrame, NetworkX Graphs, or even +CuPy or SciPy sparse Matrix. ## Vision -The vision of RAPIDS cuGraph is to ___make graph analysis ubiquitous to the -point that users just think in terms of analysis and not technologies or -frameworks___. This is a goal that many of us on the cuGraph team have been -working on for almost twenty years. Many of the early attempts focused on -solving one problem or using one technique. Those early attempts worked for -the initial goal but tended to break as the scope changed (e.g., shifting -to solving a dynamic graph problem with a static graph solution). The limiting -factors usually came down to compute power, ease-of-use, or choosing a data -structure that was not suited for all problems. NVIDIA GPUs, CUDA, and RAPIDS -have totally changed the paradigm and the goal of an accelerated unified graph +The vision of RAPIDS cuGraph is to ___make graph analysis ubiquitous to the +point that users just think in terms of analysis and not technologies or +frameworks___. This is a goal that many of us on the cuGraph team have been +working on for almost twenty years. Many of the early attempts focused on +solving one problem or using one technique. Those early attempts worked for +the initial goal but tended to break as the scope changed (e.g., shifting +to solving a dynamic graph problem with a static graph solution). The limiting +factors usually came down to compute power, ease-of-use, or choosing a data +structure that was not suited for all problems. NVIDIA GPUs, CUDA, and RAPIDS +have totally changed the paradigm and the goal of an accelerated unified graph analytic library is now possible. -The compute power of the latest NVIDIA GPUs (RAPIDS supports Pascal and later -GPU architectures) make graph analytics 1000x faster on average over NetworkX. -Moreover, the internal memory speed within a GPU allows cuGraph to rapidly -switch the data structure to best suit the needs of the analytic rather than -being restricted to a single data structure. cuGraph is working with several -frameworks for both static and dynamic graph data structures so that we always -have a solution to any graph problem. Since Python has emerged as the de facto -language for data science, allowing interactivity and the ability to run graph -analytics in Python makes cuGraph familiar and approachable. RAPIDS wraps all -the graph analytic goodness mentioned above with the ability to perform -high-speed ETL, statistics, and machine learning. To make things even better, -RAPIDS and DASK allows cuGraph to scale to multiple GPUs to support +The compute power of the latest NVIDIA GPUs (RAPIDS supports Pascal and later +GPU architectures) make graph analytics 1000x faster on average over NetworkX. +Moreover, the internal memory speed within a GPU allows cuGraph to rapidly +switch the data structure to best suit the needs of the analytic rather than +being restricted to a single data structure. cuGraph is working with several +frameworks for both static and dynamic graph data structures so that we always +have a solution to any graph problem. Since Python has emerged as the de facto +language for data science, allowing interactivity and the ability to run graph +analytics in Python makes cuGraph familiar and approachable. RAPIDS wraps all +the graph analytic goodness mentioned above with the ability to perform +high-speed ETL, statistics, and machine learning. To make things even better, +RAPIDS and DASK allows cuGraph to scale to multiple GPUs to support multi-billion edge graphs. @@ -65,9 +65,4 @@ possible. However, each field has its own argot (jargon) for describing the graph (or network). In our documentation, we try to be consistent. In Python documentation we will mostly use the terms __Node__ and __Edge__ to better match NetworkX preferred term use, as well as other Python-based tools. At -the CUDA/C layer, we favor the mathematical terms of __Vertex__ and __Edge__. - - - - - \ No newline at end of file +the CUDA/C layer, we favor the mathematical terms of __Vertex__ and __Edge__. diff --git a/docs/cugraph/source/basics/nx_transition.rst b/docs/cugraph/source/basics/nx_transition.rst index 3d116162c09..9849865814d 100644 --- a/docs/cugraph/source/basics/nx_transition.rst +++ b/docs/cugraph/source/basics/nx_transition.rst @@ -2,7 +2,7 @@ NetworkX Compatibility and Transition ************************************** -*Note: this is a work in progress and will be updatred and changed as we better flesh out +*Note: this is a work in progress and will be updatred and changed as we better flesh out compatibility issues* One of the goals of RAPIDS cuGraph is to mimic the NetworkX API to simplify @@ -13,7 +13,7 @@ and then the construction of a graph object; that is all before the execution of a graph algorithm. RAPIDS and cuGraph allow a portion or the complete analytic workflow to be accelerated. To achieve the maximum amount of acceleration, we encourage fully replacing existing code with cuGraph. -But sometimes it is easier to replace just a portion. +But sometimes it is easier to replace just a portion. Last Update ########### @@ -23,7 +23,7 @@ Release: 0.16 Information on `NetworkX `_ -This transition guide in an expansion of the Medium Blog on `NetworkX Compatibility +This transition guide in an expansion of the Medium Blog on `NetworkX Compatibility `_ @@ -33,7 +33,7 @@ Easy Path – Use NetworkX Graph Objects, Accelerated Algorithms Rather than updating all of your existing code, simply update the calls to graph algorithms by replacing the module name. This allows all the complicated ETL code to be unchanged while still seeing significate performance -improvements. +improvements. In the following example, the cuGraph module is being imported as “cnx”. While module can be assigned any name can be used, we picked cnx to reduce @@ -44,10 +44,10 @@ changes. :width: 600 It is that easy. All algorithms in cuGraph support a NetworkX graph object as -input and match the NetworkX API list of arguments. +input and match the NetworkX API list of arguments. Currently, cuGraph accepts both NetworkX Graph and DiGraph objects. We will be -adding support for Bipartite graph and Multigraph over the next few releases. +adding support for Bipartite graph and Multigraph over the next few releases. | @@ -58,7 +58,7 @@ Differences in Algorithms Since cuGraph currently does not support attribute rich graphs, those algorithms that return simple scores (centrality, clustering, etc.) best match the NetworkX process. Algorithms that return a subgraph will do so without -any additional attributes on the nodes or edges. +any additional attributes on the nodes or edges. Algorithms that exactly match ***************************** @@ -142,8 +142,8 @@ Algorithms where the results are different ****************************************** -For example, the NetworkX traversal algorithms typically return a generator -rather than a dictionary. +For example, the NetworkX traversal algorithms typically return a generator +rather than a dictionary. +----------------------------+-------------------------------------------------+ @@ -154,7 +154,7 @@ rather than a dictionary. | | (on roadmap to update) | +----------------------------+-------------------------------------------------+ | Jaccard coefficient | Currently we only do a 1-hop computation rather | -| | than an all-pairs. Fix is on roadmap | +| | than an all-pairs. Fix is on roadmap | +----------------------------+-------------------------------------------------+ | Breadth First Search (BFS) | Returns a Pandas DataFrame with: | | | [vertex][distance][predecessor] | @@ -185,14 +185,13 @@ code for building a NetworkX Graph:: ) -The code block is perfectly fine for NetworkX. However, the process of iterating over the dataframe and adding one node at a time is problematic for GPUs and something that we try and avoid. cuGraph stores data in columns (i.e. arrays). Resizing an array requires allocating a new array one element larger, copying the data, and adding the new value. That is not very efficient. +The code block is perfectly fine for NetworkX. However, the process of iterating over the dataframe and adding one node at a time is problematic for GPUs and something that we try and avoid. cuGraph stores data in columns (i.e. arrays). Resizing an array requires allocating a new array one element larger, copying the data, and adding the new value. That is not very efficient. -If your code follows the above model of inserting one element at a time, the we suggest either rewriting that code or using it as is within NetworkX and just accelerating the algorithms with cuGraph. +If your code follows the above model of inserting one element at a time, the we suggest either rewriting that code or using it as is within NetworkX and just accelerating the algorithms with cuGraph. Now, if your code bulk loads the data from Pandas, then RAPIDS can accelerate that process by orders of magnitude. .. image:: ../images/Nx_Cg_2.png :width: 600 -The above cuGraph code will create cuGraph.Graph object and not a NetworkX.Graph object. - +The above cuGraph code will create cuGraph.Graph object and not a NetworkX.Graph object. diff --git a/docs/cugraph/source/dev_resources/API.rst b/docs/cugraph/source/dev_resources/API.rst index 409e307fd9f..e32315d2fb9 100644 --- a/docs/cugraph/source/dev_resources/API.rst +++ b/docs/cugraph/source/dev_resources/API.rst @@ -2,4 +2,4 @@ API === -https://docs.rapids.ai/api/cugraph/nightly/api_docs/index.html \ No newline at end of file +https://docs.rapids.ai/api/cugraph/nightly/api_docs/index.html diff --git a/docs/cugraph/source/dev_resources/index.rst b/docs/cugraph/source/dev_resources/index.rst index 8568772b35c..fc2c4f4780f 100644 --- a/docs/cugraph/source/dev_resources/index.rst +++ b/docs/cugraph/source/dev_resources/index.rst @@ -7,4 +7,4 @@ Developer Resources :maxdepth: 3 https://docs.rapids.ai/maintainers - API.rst \ No newline at end of file + API.rst diff --git a/docs/cugraph/source/graph_support/DGL_support.md b/docs/cugraph/source/graph_support/DGL_support.md index fc1f1b45bde..dc4f66180ac 100644 --- a/docs/cugraph/source/graph_support/DGL_support.md +++ b/docs/cugraph/source/graph_support/DGL_support.md @@ -2,7 +2,7 @@ ## Description -[RAPIDS](https://rapids.ai) cugraph_dgl provides a duck-typed version of the [DGLGraph](https://docs.dgl.ai/api/python/dgl.DGLGraph.html#dgl.DGLGraph) class, which uses cugraph for storing graph structure and node/edge feature data. Using cugraph as the backend allows DGL users to access a collection of GPU accelerated algorithms for graph analytics, such as centrality computation and community detection. +[RAPIDS](https://rapids.ai) cugraph_dgl provides a duck-typed version of the [DGLGraph](https://docs.dgl.ai/api/python/dgl.DGLGraph.html#dgl.DGLGraph) class, which uses cugraph for storing graph structure and node/edge feature data. Using cugraph as the backend allows DGL users to access a collection of GPU accelerated algorithms for graph analytics, such as centrality computation and community detection. ## Conda @@ -22,7 +22,7 @@ mamba env create -n cugraph_dgl_dev --file conda/cugraph_dgl_dev_11.6.yml ### Install in editable mode ``` -pip install -e . +pip install -e . ``` ### Run tests @@ -43,12 +43,12 @@ sampler = dgl.dataloading.NeighborSampler( train_dataloader = dgl.dataloading.DataLoader( cugraph_g, -train_idx, -sampler, -device=device, +train_idx, +sampler, +device=device, batch_size=1024, shuffle=True, -drop_last=False, +drop_last=False, num_workers=0) ``` diff --git a/docs/cugraph/source/graph_support/PyG_support.md b/docs/cugraph/source/graph_support/PyG_support.md index 42d4d1c5506..b57ce7fcc6f 100644 --- a/docs/cugraph/source/graph_support/PyG_support.md +++ b/docs/cugraph/source/graph_support/PyG_support.md @@ -1,3 +1,3 @@ # cugraph_pyg -[RAPIDS](https://rapids.ai) cugraph_pyg enables the ability to use cugraph graph storage and sampling with PyTorch Geometric (PyG). PyG users will have access to cuGraph through the PyG GraphStore, FeatureStore, and Sampler interfaces. \ No newline at end of file +[RAPIDS](https://rapids.ai) cugraph_pyg enables the ability to use cugraph graph storage and sampling with PyTorch Geometric (PyG). PyG users will have access to cuGraph through the PyG GraphStore, FeatureStore, and Sampler interfaces. diff --git a/docs/cugraph/source/graph_support/algorithms.md b/docs/cugraph/source/graph_support/algorithms.md index a1b80e92751..8a5158f2f56 100644 --- a/docs/cugraph/source/graph_support/algorithms.md +++ b/docs/cugraph/source/graph_support/algorithms.md @@ -92,4 +92,4 @@ Copyright (c) 2019 - 2023, NVIDIA CORPORATION. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -___ \ No newline at end of file +___ diff --git a/docs/cugraph/source/graph_support/algorithms/Similarity.md b/docs/cugraph/source/graph_support/algorithms/Similarity.md index 18c0a94d519..96adc25ea69 100644 --- a/docs/cugraph/source/graph_support/algorithms/Similarity.md +++ b/docs/cugraph/source/graph_support/algorithms/Similarity.md @@ -5,8 +5,8 @@ The RAPIDS cuGraph Similarity folder contain a collection of Jupyter Notebooks t Results of Similarity algorithms are often used to answer questions like: * Could two vertices be duplicates or aliases of the same actor? * Can we predict missing edges based of the similarity between two nodes? -* Are multiple similar communities within the graph? -* Can I create recommendations based on the similarity between vertices in the graph. +* Are multiple similar communities within the graph? +* Can I create recommendations based on the similarity between vertices in the graph. Manipulation of the data before or after the graph analytic is not covered here. Extended, more problem focused, notebooks are being created and available https://github.com/rapidsai/notebooks-extended @@ -35,4 +35,4 @@ Copyright (c) 2019 - 2023, NVIDIA CORPORATION. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -___ \ No newline at end of file +___ diff --git a/docs/cugraph/source/graph_support/algorithms/betweenness_centrality.md b/docs/cugraph/source/graph_support/algorithms/betweenness_centrality.md index 7dc692684aa..89e5e0bdb92 100644 --- a/docs/cugraph/source/graph_support/algorithms/betweenness_centrality.md +++ b/docs/cugraph/source/graph_support/algorithms/betweenness_centrality.md @@ -39,4 +39,4 @@ Copyright (c) 2023, NVIDIA CORPORATION. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -___ \ No newline at end of file +___ diff --git a/docs/cugraph/source/graph_support/algorithms/degree_centrality.md b/docs/cugraph/source/graph_support/algorithms/degree_centrality.md index 64fe694e4c1..9253c665fb0 100644 --- a/docs/cugraph/source/graph_support/algorithms/degree_centrality.md +++ b/docs/cugraph/source/graph_support/algorithms/degree_centrality.md @@ -11,7 +11,7 @@ Degree centrality of a vertex 𝑣 is the sum of the edges incident on that node * When you need a really quick identifcation of important nodes on very simply structured data. * In cases like collaboration networks where all links have equal importance. * In many biologic and transportation networks, shear number of connections is important to itentify critical nodes whether they be proteins or airports. -* In huge graphs, Degree centrality is a the quickest +* In huge graphs, Degree centrality is a the quickest ## When not to use Degree Centrality * When weights, edge direction or edge types matter @@ -29,4 +29,4 @@ Copyright (c) 2023, NVIDIA CORPORATION. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -___ \ No newline at end of file +___ diff --git a/docs/cugraph/source/graph_support/algorithms/eigenvector_centrality.md b/docs/cugraph/source/graph_support/algorithms/eigenvector_centrality.md index 5217dc497db..8a9c7c7c767 100644 --- a/docs/cugraph/source/graph_support/algorithms/eigenvector_centrality.md +++ b/docs/cugraph/source/graph_support/algorithms/eigenvector_centrality.md @@ -18,11 +18,11 @@ Where M(v) is the adjacency list for the set of vertices(v) and λ is a constant ## When to use Eigenvector Centrality * When the quality and quantity of edges matters, in other words, connections to other high-degree nodes is important -* To calculate influence in nuanced networks like social and financial networks. +* To calculate influence in nuanced networks like social and financial networks. ## When not to use Eigenvector Centrality * in graphs with many disconnected groups -* in graphs containing many distinct and different communities +* in graphs containing many distinct and different communities * in networks with negative weights * in huge networks eigenvector centrality can become computationally infeasible in single threaded systems. @@ -39,4 +39,3 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ___ - diff --git a/docs/cugraph/source/graph_support/algorithms/jaccard_similarity.md b/docs/cugraph/source/graph_support/algorithms/jaccard_similarity.md index c9ffc76d182..dde98d71ea3 100644 --- a/docs/cugraph/source/graph_support/algorithms/jaccard_similarity.md +++ b/docs/cugraph/source/graph_support/algorithms/jaccard_similarity.md @@ -1,6 +1,6 @@ # Jaccard Similarity -The Jaccard similarity between two sets is defined as the ratio of the volume of their intersection divided by the volume of their union. +The Jaccard similarity between two sets is defined as the ratio of the volume of their intersection divided by the volume of their union. The Jaccard Similarity can then be defined as @@ -31,4 +31,4 @@ Copyright (c) 2023, NVIDIA CORPORATION. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -___ \ No newline at end of file +___ diff --git a/docs/cugraph/source/graph_support/algorithms/katz_centrality.md b/docs/cugraph/source/graph_support/algorithms/katz_centrality.md index 3bfe4d40c05..69b5d6b27b9 100644 --- a/docs/cugraph/source/graph_support/algorithms/katz_centrality.md +++ b/docs/cugraph/source/graph_support/algorithms/katz_centrality.md @@ -25,4 +25,4 @@ Copyright (c) 2023, NVIDIA CORPORATION. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -___ \ No newline at end of file +___ diff --git a/docs/cugraph/source/graph_support/algorithms/overlap_similarity.md b/docs/cugraph/source/graph_support/algorithms/overlap_similarity.md index 026d713fbd6..d9f9f681ea2 100644 --- a/docs/cugraph/source/graph_support/algorithms/overlap_similarity.md +++ b/docs/cugraph/source/graph_support/algorithms/overlap_similarity.md @@ -15,7 +15,7 @@ $overlap(A,B) = \frac{|A \cap B|}{min(|A|,|B|)}$ ## When not to use Overlap Similarity * You are trying to compare graphs of extremely different sizes * In overly sparse or dense graph can overlap similarity can miss relationships or give fals positives respectively. -* In directed graphs, there are better algorithms to use. +* In directed graphs, there are better algorithms to use. ## How computationally expensive is it? @@ -29,4 +29,4 @@ Copyright (c) 2023, NVIDIA CORPORATION. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -___ \ No newline at end of file +___ diff --git a/docs/cugraph/source/graph_support/algorithms/sorensen_coefficient.md b/docs/cugraph/source/graph_support/algorithms/sorensen_coefficient.md index 01b6709de5b..67d981815cf 100644 --- a/docs/cugraph/source/graph_support/algorithms/sorensen_coefficient.md +++ b/docs/cugraph/source/graph_support/algorithms/sorensen_coefficient.md @@ -23,7 +23,7 @@ In graphs, the sets refer to the set of connected nodes or neighborhood of nodes ## How computationally expensive is it? While cuGraph's parallelism mitigates run cost, [Big O notation](https://en.wikipedia.org/wiki/Big_O_notation) is still the standard to compare algorithm execution time. -The cost to run O(n * m) where n is the number of nodes in the graph and m is the number of groups to test. +The cost to run O(n * m) where n is the number of nodes in the graph and m is the number of groups to test. ___ Copyright (c) 2023, NVIDIA CORPORATION. @@ -31,4 +31,4 @@ Copyright (c) 2023, NVIDIA CORPORATION. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -___ \ No newline at end of file +___ diff --git a/docs/cugraph/source/graph_support/compatibility.rst b/docs/cugraph/source/graph_support/compatibility.rst index be0cc21c2dd..ddb3f1d5fe5 100644 --- a/docs/cugraph/source/graph_support/compatibility.rst +++ b/docs/cugraph/source/graph_support/compatibility.rst @@ -5,4 +5,4 @@ Compatibility .. toctree:: :maxdepth: 3 -Compatibility document coming soon \ No newline at end of file +Compatibility document coming soon diff --git a/docs/cugraph/source/graph_support/cugraph_service.rst b/docs/cugraph/source/graph_support/cugraph_service.rst index 620b22d1e0b..658f9e731df 100644 --- a/docs/cugraph/source/graph_support/cugraph_service.rst +++ b/docs/cugraph/source/graph_support/cugraph_service.rst @@ -5,5 +5,5 @@ CuGraph Service .. toctree:: :maxdepth: 3 - -Cugraph Service for remote access to a server-based cuGraph(https://github.com/rapidsai/cugraph/blob/branch-23.04/python/cugraph-service/README.md) \ No newline at end of file + +Cugraph Service for remote access to a server-based cuGraph(https://github.com/rapidsai/cugraph/blob/branch-23.04/python/cugraph-service/README.md) diff --git a/docs/cugraph/source/graph_support/cugraphops_support.rst b/docs/cugraph/source/graph_support/cugraphops_support.rst index 08ae3b218c7..fd79564f849 100644 --- a/docs/cugraph/source/graph_support/cugraphops_support.rst +++ b/docs/cugraph/source/graph_support/cugraphops_support.rst @@ -7,4 +7,4 @@ cugraph-ops aims to be a low-level, framework agnostic library providing commonl .. toctree:: :maxdepth: 3 - https://github.com/rapidsai/cugraph-ops/blob/branch-23.04/README.md \ No newline at end of file + https://github.com/rapidsai/cugraph-ops/blob/branch-23.04/README.md diff --git a/docs/cugraph/source/graph_support/datastores.rst b/docs/cugraph/source/graph_support/datastores.rst index f921100774c..50d8461e7fe 100644 --- a/docs/cugraph/source/graph_support/datastores.rst +++ b/docs/cugraph/source/graph_support/datastores.rst @@ -6,4 +6,3 @@ Data Stores property_graph.md knowledge_stores.md feature_stores.md - diff --git a/docs/cugraph/source/graph_support/feature_stores.md b/docs/cugraph/source/graph_support/feature_stores.md index f40cab72ee0..ef9358c4cf8 100644 --- a/docs/cugraph/source/graph_support/feature_stores.md +++ b/docs/cugraph/source/graph_support/feature_stores.md @@ -1,3 +1,3 @@ # Feature Store -Coming Soon \ No newline at end of file +Coming Soon diff --git a/docs/cugraph/source/graph_support/gnn_support.rst b/docs/cugraph/source/graph_support/gnn_support.rst index 3c92dc36098..639b657c64d 100644 --- a/docs/cugraph/source/graph_support/gnn_support.rst +++ b/docs/cugraph/source/graph_support/gnn_support.rst @@ -5,8 +5,8 @@ Graph Neural Network Support .. toctree:: :maxdepth: 2 - + PyG_support.md DGL_support.md cugraphops_support.rst - wholegraph_support.md \ No newline at end of file + wholegraph_support.md diff --git a/docs/cugraph/source/graph_support/graph_algorithms.rst b/docs/cugraph/source/graph_support/graph_algorithms.rst index a8ba01aa915..38dd8ccc25b 100644 --- a/docs/cugraph/source/graph_support/graph_algorithms.rst +++ b/docs/cugraph/source/graph_support/graph_algorithms.rst @@ -4,5 +4,5 @@ Algorithms .. toctree:: :maxdepth: 3 - - algorithms.md \ No newline at end of file + + algorithms.md diff --git a/docs/cugraph/source/graph_support/index.rst b/docs/cugraph/source/graph_support/index.rst index 9526fae7eb2..67aba74288b 100644 --- a/docs/cugraph/source/graph_support/index.rst +++ b/docs/cugraph/source/graph_support/index.rst @@ -10,4 +10,4 @@ Graph Support compatibility.rst gnn_support.rst datastores.rst - cugraph_service.rst \ No newline at end of file + cugraph_service.rst diff --git a/docs/cugraph/source/graph_support/knowledge_stores.md b/docs/cugraph/source/graph_support/knowledge_stores.md index 4d6028a598c..1749eb2b4c1 100644 --- a/docs/cugraph/source/graph_support/knowledge_stores.md +++ b/docs/cugraph/source/graph_support/knowledge_stores.md @@ -1,3 +1,3 @@ # Knowledge Store -Coming Soon \ No newline at end of file +Coming Soon diff --git a/docs/cugraph/source/graph_support/property_graph.md b/docs/cugraph/source/graph_support/property_graph.md index 614910b79bc..ef07be79ba0 100644 --- a/docs/cugraph/source/graph_support/property_graph.md +++ b/docs/cugraph/source/graph_support/property_graph.md @@ -16,8 +16,8 @@ Property Graph enables: This is an example of using the cuGraph Property Graph in a two stage analysis. ``` -import cudf -import cugraph +import cudf +import cugraph from cugraph.experimental import PropertyGraph # Import a built-in dataset @@ -29,22 +29,22 @@ graph = cugraph.Graph(directed=False) G = karate.get_graph(create_using=graph,fetch=True) df = G.edgelist.edgelist_df -pG = PropertyGraph() +pG = PropertyGraph() pG. add_edge_data(df, vertex_col_names=("src", "dst")) -# Run Louvain to get the partition number for each vertex. -# Set resolution accordingly to identify two primary partitions. +# Run Louvain to get the partition number for each vertex. +# Set resolution accordingly to identify two primary partitions. (partition_info, _) = cugraph.louvain(pG.extract_subgraph(create_using=graph), resolution=0.6) -# Add the partition numbers back to the Property Graph as vertex properties +# Add the partition numbers back to the Property Graph as vertex properties pG.add_vertex_data(partition_info, vertex_col_name="vertex") -# Use the partition properties to extract a Graph for each partition. +# Use the partition properties to extract a Graph for each partition. G0 = pG.extract_subgraph(selection=pG.select_vertices("partition == 0")) G1 = pG.extract_subgraph(selection=pG. select_vertices("partition == 1")) -# Run pagerank on each graph, print results. -pageranks0 = cugraph.pagerank(G0) -pageranks1 = cugraph.pagerank(G1) +# Run pagerank on each graph, print results. +pageranks0 = cugraph.pagerank(G0) +pageranks1 = cugraph.pagerank(G1) print(pageranks0.sort_values (by="pagerank", ascending=False).head(3)) print(pageranks1.sort_values (by="pagerank", ascending=False).head(3)) -``` \ No newline at end of file +``` diff --git a/docs/cugraph/source/graph_support/wholegraph_support.md b/docs/cugraph/source/graph_support/wholegraph_support.md index fa26700a648..d1c5eaf7254 100644 --- a/docs/cugraph/source/graph_support/wholegraph_support.md +++ b/docs/cugraph/source/graph_support/wholegraph_support.md @@ -1,4 +1,4 @@ # WholeGraph [RAPIDS](https://rapids.ai) [WholeGraph](https://github.com/rapidsai/wholegraph) is designed to help train large-scale Graph Neural Networks(GNN). -Please see [WholeGraph Introduction](https://github.com/rapidsai/wholegraph/blob/main/README.md) for more details \ No newline at end of file +Please see [WholeGraph Introduction](https://github.com/rapidsai/wholegraph/blob/main/README.md) for more details diff --git a/docs/cugraph/source/index.rst b/docs/cugraph/source/index.rst index 955eb6d54db..b18a79d3396 100644 --- a/docs/cugraph/source/index.rst +++ b/docs/cugraph/source/index.rst @@ -18,7 +18,7 @@ RAPIDS Graph documentation - :abbr:`cugraph-service (Graph-as-a-service provides both Client and Server packages)` * - :abbr:`pylibcugraph (light-weight Python wrapper with no guard rails)` - :abbr:`cugraph-dgl (Accelerated extensions for use with the DGL framework)` - - + - * - :abbr:`libcugraph (C++ API)` - :abbr:`cugraph-pyg (Accelerated extensions for use with the PyG framework)` - @@ -33,7 +33,7 @@ Introduction ~~~~~~~~~~~~ cuGraph is a library of graph algorithms that seamlessly integrates into the RAPIDS data science ecosystem and allows the data scientist to easily call -graph algorithms using data stored in GPU DataFrames, NetworkX Graphs, or +graph algorithms using data stored in GPU DataFrames, NetworkX Graphs, or even CuPy or SciPy sparse Matrices. Note: We are redoing all of our documents, please be patient as we update diff --git a/docs/cugraph/source/installation/getting_cugraph.md b/docs/cugraph/source/installation/getting_cugraph.md index d9d3fa55c9a..4d601bf3217 100644 --- a/docs/cugraph/source/installation/getting_cugraph.md +++ b/docs/cugraph/source/installation/getting_cugraph.md @@ -1,7 +1,7 @@ # Getting cuGraph Packages -Start by reading the [RAPIDS Instalation guide](https://docs.rapids.ai/install) +Start by reading the [RAPIDS Instalation guide](https://docs.rapids.ai/install) and checkout the [RAPIDS install selector](https://rapids.ai/start.html) for a pick list of install options. @@ -45,7 +45,7 @@ conda install -c rapidsai -c conda-forge -c nvidia cugraph cuda-version=12.0 Alternatively, use `cuda-version=11.8` for packages supporting CUDA 11. -Note: This conda installation only applies to Linux and Python versions 3.9/3.10. +Note: This conda installation only applies to Linux and Python versions 3.9/3.10/3.11.
@@ -65,4 +65,3 @@ Also available: * nx-cugraph-cu12
- diff --git a/docs/cugraph/source/installation/index.rst b/docs/cugraph/source/installation/index.rst index 27858b77012..8ad12c5895b 100644 --- a/docs/cugraph/source/installation/index.rst +++ b/docs/cugraph/source/installation/index.rst @@ -5,6 +5,6 @@ Installation .. toctree:: :maxdepth: 3 - + getting_cugraph source_build diff --git a/docs/cugraph/source/references/cugraph_ref.md b/docs/cugraph/source/references/cugraph_ref.md index a646d6da243..845436a60f2 100644 --- a/docs/cugraph/source/references/cugraph_ref.md +++ b/docs/cugraph/source/references/cugraph_ref.md @@ -43,4 +43,3 @@

- diff --git a/docs/cugraph/source/references/datasets.md b/docs/cugraph/source/references/datasets.md index 3d45dec188a..35234de87c6 100644 --- a/docs/cugraph/source/references/datasets.md +++ b/docs/cugraph/source/references/datasets.md @@ -4,18 +4,18 @@ karate - W. W. Zachary, *An information flow model for conflict and fission in small groups*, Journal of Anthropological Research 33, 452-473 (1977). dolphins - D. Lusseau, K. Schneider, O. J. Boisseau, P. Haase, E. Slooten, and S. M. Dawson, - *The bottlenose dolphin community of Doubtful Sound features a large proportion of long-lasting associations*, + *The bottlenose dolphin community of Doubtful Sound features a large proportion of long-lasting associations*, Behavioral Ecology and Sociobiology 54, 396-405 (2003). netscience - M. E. J. Newman, - *Finding community structure in networks using the eigenvectors of matrices*, + *Finding community structure in networks using the eigenvectors of matrices*, Preprint physics/0605087 (2006). email-Eu-core - Hao Yin, Austin R. Benson, Jure Leskovec, and David F. Gleich. - *Local Higher-order Graph Clustering.* + *Local Higher-order Graph Clustering.* In Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2017. - - J. Leskovec, J. Kleinberg and C. Faloutsos. - *Graph Evolution: Densification and Shrinking Diameters*. - ACM Transactions on Knowledge Discovery from Data (ACM TKDD), 1(1), 2007. http://www.cs.cmu.edu/~jure/pubs/powergrowth-tkdd.pdf + - J. Leskovec, J. Kleinberg and C. Faloutsos. + *Graph Evolution: Densification and Shrinking Diameters*. + ACM Transactions on Knowledge Discovery from Data (ACM TKDD), 1(1), 2007. http://www.cs.cmu.edu/~jure/pubs/powergrowth-tkdd.pdf polbooks - - V. Krebs, unpublished, http://www.orgnet.com/. \ No newline at end of file + - V. Krebs, unpublished, http://www.orgnet.com/. diff --git a/docs/cugraph/source/references/index.rst b/docs/cugraph/source/references/index.rst index d7a173a8330..9ea51a08356 100644 --- a/docs/cugraph/source/references/index.rst +++ b/docs/cugraph/source/references/index.rst @@ -5,7 +5,7 @@ References .. toctree:: :maxdepth: 3 - + cugraph_ref.md datasets.md licenses.md diff --git a/docs/cugraph/source/references/licenses.md b/docs/cugraph/source/references/licenses.md index dfc950023cf..b95905d9f2f 100644 --- a/docs/cugraph/source/references/licenses.md +++ b/docs/cugraph/source/references/licenses.md @@ -1,7 +1,7 @@ # License Most of the Graph code is open-sourced and developed under the Apache 2.0 licnese. -The cugraph-ops code is closed sourced and developed under a NVIDIA copyright +The cugraph-ops code is closed sourced and developed under a NVIDIA copyright diff --git a/docs/cugraph/source/releases/index.rst b/docs/cugraph/source/releases/index.rst index 7bd4f6dfa31..cbd22324b9b 100644 --- a/docs/cugraph/source/releases/index.rst +++ b/docs/cugraph/source/releases/index.rst @@ -2,4 +2,4 @@ Releases ======== -https://github.com/rapidsai/cugraph/blob/main/CHANGELOG.md \ No newline at end of file +https://github.com/rapidsai/cugraph/blob/main/CHANGELOG.md diff --git a/docs/cugraph/source/tutorials/community_resources.md b/docs/cugraph/source/tutorials/community_resources.md index 572f85a015b..1c4362393d1 100644 --- a/docs/cugraph/source/tutorials/community_resources.md +++ b/docs/cugraph/source/tutorials/community_resources.md @@ -1,2 +1,2 @@ # Commmunity Resources -[Rapids Community Repository](https://github.com/rapidsai-community/notebooks-contrib) \ No newline at end of file +[Rapids Community Repository](https://github.com/rapidsai-community/notebooks-contrib) diff --git a/docs/cugraph/source/tutorials/cugraph_blogs.rst b/docs/cugraph/source/tutorials/cugraph_blogs.rst index 368dbcce4f8..373e846f6c3 100644 --- a/docs/cugraph/source/tutorials/cugraph_blogs.rst +++ b/docs/cugraph/source/tutorials/cugraph_blogs.rst @@ -3,7 +3,7 @@ cuGraph Blogs and Presentations ************************************************ The RAPIDS team blogs at https://medium.com/rapids-ai, and many of -these blog posts provide deeper dives into features from cuGraph. +these blog posts provide deeper dives into features from cuGraph. Here, we've selected just a few that are of particular interest to cuGraph users: diff --git a/docs/cugraph/source/tutorials/cugraph_notebooks.md b/docs/cugraph/source/tutorials/cugraph_notebooks.md index d4251d4c9b2..559ba36e97e 100644 --- a/docs/cugraph/source/tutorials/cugraph_notebooks.md +++ b/docs/cugraph/source/tutorials/cugraph_notebooks.md @@ -53,7 +53,7 @@ Running the example in these notebooks requires: * The latest version of RAPIDS with cuGraph. * Download via Docker, Conda (See [__Getting Started__](https://rapids.ai/start.html)) - + * cuGraph is dependent on the latest version of cuDF. Please install all components of RAPIDS * Python 3.8+ * A system with an NVIDIA GPU: Pascal architecture or better @@ -66,7 +66,7 @@ Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/docs/cugraph/source/tutorials/how_to_guides.md b/docs/cugraph/source/tutorials/how_to_guides.md index 42da6ed21ca..80be5b4ab5b 100644 --- a/docs/cugraph/source/tutorials/how_to_guides.md +++ b/docs/cugraph/source/tutorials/how_to_guides.md @@ -1,9 +1,9 @@ # How To Guides -- Basic use of cuGraph, on the page -- Property graph with analytic flow -- GNN – model building -- cuGraph Service – client/server setup and use (ucx) -- MNMG Graph – dask, rmm basics and examples -- Pylibcugraph – why and how -- Cugraph for C, C++ users -- Use of nvidia-smi with cugraph \ No newline at end of file +- Basic use of cuGraph, on the page +- Property graph with analytic flow +- GNN – model building +- cuGraph Service – client/server setup and use (ucx) +- MNMG Graph – dask, rmm basics and examples +- Pylibcugraph – why and how +- Cugraph for C, C++ users +- Use of nvidia-smi with cugraph diff --git a/docs/cugraph/source/wholegraph/basics/wholememory_implementation_details.md b/docs/cugraph/source/wholegraph/basics/wholememory_implementation_details.md index a5541109c4f..634539cd27e 100644 --- a/docs/cugraph/source/wholegraph/basics/wholememory_implementation_details.md +++ b/docs/cugraph/source/wholegraph/basics/wholememory_implementation_details.md @@ -18,7 +18,7 @@ partition data into these GPU devices. The partition method guarantees that each GPU can access one continuous part of the entire memory. Here "can access" means can directly access from CUDA kernels, but the memory doesn't have to be physically on that GPU. For example,it can be on host memory or other GPU's device memory that can be access using P2P. -In that case the stored data has its own granularity that shouldn't be split. Data granularity can be specified while +In that case the stored data has its own granularity that shouldn't be split. Data granularity can be specified while creating WholeMemory. Then each data granularity can be considered as a block of data. The follow figure shows the layout of 15 data block over 4 GPUs. diff --git a/docs/cugraph/source/wholegraph/index.rst b/docs/cugraph/source/wholegraph/index.rst index 2a69544b4c9..bb2281b1351 100644 --- a/docs/cugraph/source/wholegraph/index.rst +++ b/docs/cugraph/source/wholegraph/index.rst @@ -11,4 +11,3 @@ RAPIDS WholeGraph has following package: basics/index installation/index - diff --git a/docs/cugraph/source/wholegraph/installation/getting_wholegraph.md b/docs/cugraph/source/wholegraph/installation/getting_wholegraph.md index 5b2072b0523..57314dcd426 100644 --- a/docs/cugraph/source/wholegraph/installation/getting_wholegraph.md +++ b/docs/cugraph/source/wholegraph/installation/getting_wholegraph.md @@ -1,7 +1,7 @@ # Getting the WholeGraph Packages -Start by reading the [RAPIDS Instalation guide](https://docs.rapids.ai/install) +Start by reading the [RAPIDS Instalation guide](https://docs.rapids.ai/install) and checkout the [RAPIDS install selector](https://rapids.ai/start.html) for a pick list of install options. diff --git a/notebooks/README.md b/notebooks/README.md index 3f1cdbaf2a1..31660b21569 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -54,7 +54,7 @@ Running the example in these notebooks requires: * The latest version of RAPIDS with cuGraph. * Download via Docker, Conda (See [__Getting Started__](https://rapids.ai/start.html)) - + * cuGraph is dependent on the latest version of cuDF. Please install all components of RAPIDS * Python 3.8+ * A system with an NVIDIA GPU: Pascal architecture or better @@ -124,7 +124,7 @@ Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. @@ -133,4 +133,3 @@ Unless required by applicable law or agreed to in writing, software distributed ![RAPIDS](img/rapids_logo.png) - diff --git a/notebooks/algorithms/README.md b/notebooks/algorithms/README.md index 7965970c7d9..2aa35b52b0f 100644 --- a/notebooks/algorithms/README.md +++ b/notebooks/algorithms/README.md @@ -58,7 +58,7 @@ Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/notebooks/algorithms/centrality/README.md b/notebooks/algorithms/centrality/README.md index f15ed7bdc04..be838b1c76a 100644 --- a/notebooks/algorithms/centrality/README.md +++ b/notebooks/algorithms/centrality/README.md @@ -34,7 +34,7 @@ Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/notebooks/algorithms/community/README.md b/notebooks/algorithms/community/README.md index 748f27c0ffe..222dab4ffe3 100644 --- a/notebooks/algorithms/community/README.md +++ b/notebooks/algorithms/community/README.md @@ -36,7 +36,7 @@ Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/notebooks/algorithms/components/README.md b/notebooks/algorithms/components/README.md index e14e886a7e2..f3c6f925e36 100644 --- a/notebooks/algorithms/components/README.md +++ b/notebooks/algorithms/components/README.md @@ -27,7 +27,7 @@ Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/notebooks/algorithms/cores/README.md b/notebooks/algorithms/cores/README.md index e36677ffe32..0e2b1e56702 100644 --- a/notebooks/algorithms/cores/README.md +++ b/notebooks/algorithms/cores/README.md @@ -27,7 +27,7 @@ Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/notebooks/algorithms/link_analysis/README.md b/notebooks/algorithms/link_analysis/README.md index 17d4b8ab6d1..3ed23e02a62 100644 --- a/notebooks/algorithms/link_analysis/README.md +++ b/notebooks/algorithms/link_analysis/README.md @@ -4,7 +4,7 @@ cuGraph Link Analysis notebooks contain Jupyter Notebooks that demonstrate algorithms to rank the importance of individual vertices within the graph. A good comparison of the two algorithms can be found [here](https://www.ijert.org/research/comparative-analysis-of-pagerank-and-hits-algorithms-IJERTV1IS8530.pdf). -In general, Pagerank is more popular because it is more efficient and takes less query time. +In general, Pagerank is more popular because it is more efficient and takes less query time. Manipulation of the data before or after the graph analytic is not covered here. Extended, more problem focused, notebooks are being created and available https://github.com/rapidsai/notebooks-extended @@ -29,7 +29,7 @@ Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/notebooks/algorithms/link_prediction/README.md b/notebooks/algorithms/link_prediction/README.md index e3391e4d0cf..4030a2880f7 100644 --- a/notebooks/algorithms/link_prediction/README.md +++ b/notebooks/algorithms/link_prediction/README.md @@ -1,7 +1,7 @@ # Vertex Similarity ---- -In this folder we will explore and compare the various vertex similarity metrics available in cuGraph. [Vertex similarity](https://en.wikipedia.org/wiki/Similarity_(network_science)), as the name implies, is a measure how similar two vertices are. +In this folder we will explore and compare the various vertex similarity metrics available in cuGraph. [Vertex similarity](https://en.wikipedia.org/wiki/Similarity_(network_science)), as the name implies, is a measure how similar two vertices are. |Algorithm |Notebooks Containing |Description | | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | @@ -34,7 +34,7 @@ Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/notebooks/algorithms/sampling/README.md b/notebooks/algorithms/sampling/README.md index 9c60adcb9e3..6b7c5b82ba6 100644 --- a/notebooks/algorithms/sampling/README.md +++ b/notebooks/algorithms/sampling/README.md @@ -29,7 +29,7 @@ Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/notebooks/algorithms/structure/README.md b/notebooks/algorithms/structure/README.md index b209ddba71c..ea649037089 100644 --- a/notebooks/algorithms/structure/README.md +++ b/notebooks/algorithms/structure/README.md @@ -27,7 +27,7 @@ Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/notebooks/algorithms/traversal/README.md b/notebooks/algorithms/traversal/README.md index 4d4f37a0f8c..c8478831f66 100644 --- a/notebooks/algorithms/traversal/README.md +++ b/notebooks/algorithms/traversal/README.md @@ -25,7 +25,7 @@ Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/notebooks/modules/README.md b/notebooks/modules/README.md index 29cb47784c7..0cf7ccb61e7 100644 --- a/notebooks/modules/README.md +++ b/notebooks/modules/README.md @@ -1,3 +1,3 @@ # This folde contains partial notebooks -The code here is meant to be used to build other notebooks \ No newline at end of file +The code here is meant to be used to build other notebooks diff --git a/print_env.sh b/print_env.sh index 6f2d33b0eb8..43abd1c7ea7 100644 --- a/print_env.sh +++ b/print_env.sh @@ -1,7 +1,8 @@ #!/usr/bin/env bash +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Reports relevant environment information useful for diagnosing and # debugging cuGraph issues. -# Usage: +# Usage: # "./print_env.sh" - prints to stdout # "./print_env.sh > env.txt" - prints to file "env.txt" @@ -14,16 +15,16 @@ git submodule status --recursive else echo "Not inside a git repository" fi -echo +echo echo "***OS Information***" cat /etc/*-release uname -a -echo +echo echo "***GPU Information***" nvidia-smi -echo +echo echo "***CPU***" lscpu @@ -31,15 +32,15 @@ echo echo "***CMake***" which cmake && cmake --version -echo +echo echo "***g++***" which g++ && g++ --version -echo +echo echo "***nvcc***" which nvcc && nvcc --version -echo +echo echo "***Python***" which python && python -c "import sys; print('Python {0}.{1}.{2}'.format(sys.version_info[0], sys.version_info[1], sys.version_info[2]))" diff --git a/python/cugraph-dgl/README.md b/python/cugraph-dgl/README.md index 6de476ae500..ac4cb2f6253 100644 --- a/python/cugraph-dgl/README.md +++ b/python/cugraph-dgl/README.md @@ -2,7 +2,7 @@ ## Description -[RAPIDS](https://rapids.ai) cugraph_dgl provides a duck-typed version of the [DGLGraph](https://docs.dgl.ai/api/python/dgl.DGLGraph.html#dgl.DGLGraph) class, which uses cugraph for storing graph structure and node/edge feature data. Using cugraph as the backend allows DGL users to access a collection of GPU accelerated algorithms for graph analytics, such as centrality computation and community detection. +[RAPIDS](https://rapids.ai) cugraph_dgl provides a duck-typed version of the [DGLGraph](https://docs.dgl.ai/api/python/dgl.DGLGraph.html#dgl.DGLGraph) class, which uses cugraph for storing graph structure and node/edge feature data. Using cugraph as the backend allows DGL users to access a collection of GPU accelerated algorithms for graph analytics, such as centrality computation and community detection. ## Conda @@ -22,7 +22,7 @@ mamba env create -n cugraph_dgl_dev --file conda/cugraph_dgl_dev_11.6.yml ### Install in editable mode ``` -pip install -e . +pip install -e . ``` ### Run tests @@ -42,15 +42,13 @@ sampler = dgl.dataloading.NeighborSampler( [15, 10, 5], prefetch_node_feats=['feat'], prefetch_labels=['label']) train_dataloader = dgl.dataloading.DataLoader( -- dgl_g, +- dgl_g, + cugraph_g, -train_idx, -sampler, -device=device, +train_idx, +sampler, +device=device, batch_size=1024, shuffle=True, -drop_last=False, +drop_last=False, num_workers=0) ``` - - diff --git a/python/cugraph/cugraph/community/egonet.py b/python/cugraph/cugraph/community/egonet.py index 01bbc41d8cd..b7341ca3bae 100644 --- a/python/cugraph/cugraph/community/egonet.py +++ b/python/cugraph/cugraph/community/egonet.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,18 +11,18 @@ # See the License for the specific language governing permissions and # limitations under the License. + +import warnings + +import cudf from cugraph.utilities import ( ensure_cugraph_obj, is_nx_graph_type, ) from cugraph.utilities import cugraph_to_nx -import cudf - from pylibcugraph import ego_graph as pylibcugraph_ego_graph - from pylibcugraph import ResourceHandle -import warnings def _convert_graph_to_output_type(G, input_type): @@ -49,6 +49,7 @@ def _convert_df_series_to_output_type(df, offsets, input_type): return df, offsets +# TODO: add support for a 'batch-mode' option. def ego_graph(G, n, radius=1, center=True, undirected=None, distance=None): """ Compute the induced subgraph of neighbors centered at node n, @@ -118,6 +119,7 @@ def ego_graph(G, n, radius=1, center=True, undirected=None, distance=None): # Match the seed to the vertex dtype n_type = G.edgelist.edgelist_df["src"].dtype + # FIXME: 'n' should represent a single vertex, but is not being verified n = n.astype(n_type) do_expensive_check = False @@ -154,6 +156,11 @@ def ego_graph(G, n, radius=1, center=True, undirected=None, distance=None): def batched_ego_graphs(G, seeds, radius=1, center=True, undirected=None, distance=None): """ + This function is deprecated. + + Deprecated since 24.04. Batched support for multiple seeds will be added + to `ego_graph`. + Compute the induced subgraph of neighbors for each node in seeds within a given radius. @@ -196,6 +203,9 @@ def batched_ego_graphs(G, seeds, radius=1, center=True, undirected=None, distanc ... radius=2) """ + warning_msg = "This function is deprecated. Batched support for multiple vertices \ + will be added to `ego_graph`" + warnings.warn(warning_msg, DeprecationWarning) (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") diff --git a/python/cugraph/cugraph/dask/community/egonet.py b/python/cugraph/cugraph/dask/community/egonet.py index e49d4777cef..e4282786653 100644 --- a/python/cugraph/cugraph/dask/community/egonet.py +++ b/python/cugraph/cugraph/dask/community/egonet.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -129,9 +129,9 @@ def ego_graph(input_graph, n, radius=1, center=True): # renumbered, the node ID must also be renumbered. if input_graph.renumbered: n = input_graph.lookup_internal_vertex_id(n) - n_type = input_graph.edgelist.edgelist_df.dtypes[0] + n_type = input_graph.edgelist.edgelist_df.dtypes.iloc[0] else: - n_type = input_graph.input_df.dtypes[0] + n_type = input_graph.input_df.dtypes.iloc[0] if isinstance(n, (cudf.Series, cudf.DataFrame)): n = dask_cudf.from_cudf(n, npartitions=min(input_graph._npartitions, len(n))) diff --git a/python/cugraph/cugraph/dask/structure/replication.pyx b/python/cugraph/cugraph/dask/structure/replication.pyx index 64f43663517..04e14cb012f 100644 --- a/python/cugraph/cugraph/dask/structure/replication.pyx +++ b/python/cugraph/cugraph/dask/structure/replication.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -141,4 +141,4 @@ cdef comms_bcast(uintptr_t handle, elif dtype == np.float64: c_utils.comms_bcast(( handle)[0], value_ptr, count) else: - raise TypeError("Unsupported broadcast type") \ No newline at end of file + raise TypeError("Unsupported broadcast type") diff --git a/python/cugraph/cugraph/datasets/metadata/cit-patents.yaml b/python/cugraph/cugraph/datasets/metadata/cit-patents.yaml index d5c4cf195bd..37d288e6339 100644 --- a/python/cugraph/cugraph/datasets/metadata/cit-patents.yaml +++ b/python/cugraph/cugraph/datasets/metadata/cit-patents.yaml @@ -3,7 +3,7 @@ file_type: .csv description: A citation graph that includes all citations made by patents granted between 1975 and 1999, totaling 16,522,438 citations. author: NBER refs: - J. Leskovec, J. Kleinberg and C. Faloutsos. Graphs over Time Densification Laws, Shrinking Diameters and Possible Explanations. + J. Leskovec, J. Kleinberg and C. Faloutsos. Graphs over Time Densification Laws, Shrinking Diameters and Possible Explanations. ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD), 2005. delim: " " header: None @@ -19,4 +19,4 @@ is_multigraph: false is_symmetric: false number_of_edges: 16518948 number_of_nodes: 3774768 -url: https://data.rapids.ai/cugraph/datasets/cit-Patents.csv \ No newline at end of file +url: https://data.rapids.ai/cugraph/datasets/cit-Patents.csv diff --git a/python/cugraph/cugraph/datasets/metadata/dining_prefs.yaml b/python/cugraph/cugraph/datasets/metadata/dining_prefs.yaml index e7ec85d7a1f..3f70a886b59 100644 --- a/python/cugraph/cugraph/datasets/metadata/dining_prefs.yaml +++ b/python/cugraph/cugraph/datasets/metadata/dining_prefs.yaml @@ -20,4 +20,4 @@ is_multigraph: false is_symmetric: true number_of_edges: 42 number_of_nodes: 26 -url: https://data.rapids.ai/cugraph/datasets/dining_prefs.csv \ No newline at end of file +url: https://data.rapids.ai/cugraph/datasets/dining_prefs.csv diff --git a/python/cugraph/cugraph/datasets/metadata/dolphins.yaml b/python/cugraph/cugraph/datasets/metadata/dolphins.yaml index bc7cb6cd486..70c178bc5a2 100644 --- a/python/cugraph/cugraph/datasets/metadata/dolphins.yaml +++ b/python/cugraph/cugraph/datasets/metadata/dolphins.yaml @@ -1,6 +1,6 @@ name: dolphins file_type: .csv -description: An undirected social network of frequent associations between 62 dolphins in a community living off Doubtful Sound, New Zealand, as compiled by Lusseau et al. (2003). +description: An undirected social network of frequent associations between 62 dolphins in a community living off Doubtful Sound, New Zealand, as compiled by Lusseau et al. (2003). author: - D. Lusseau - K. Schneider diff --git a/python/cugraph/cugraph/datasets/metadata/europe_osm.yaml b/python/cugraph/cugraph/datasets/metadata/europe_osm.yaml index fe0e42a4b86..1443f8d5cdd 100644 --- a/python/cugraph/cugraph/datasets/metadata/europe_osm.yaml +++ b/python/cugraph/cugraph/datasets/metadata/europe_osm.yaml @@ -18,4 +18,4 @@ is_multigraph: false is_symmetric: true number_of_edges: 54054660 number_of_nodes: 50912018 -url: https://data.rapids.ai/cugraph/datasets/europe_osm.csv \ No newline at end of file +url: https://data.rapids.ai/cugraph/datasets/europe_osm.csv diff --git a/python/cugraph/cugraph/datasets/metadata/hollywood.yaml b/python/cugraph/cugraph/datasets/metadata/hollywood.yaml index 2f09cf7679b..9a8ef56532b 100644 --- a/python/cugraph/cugraph/datasets/metadata/hollywood.yaml +++ b/python/cugraph/cugraph/datasets/metadata/hollywood.yaml @@ -23,4 +23,4 @@ is_multigraph: false is_symmetric: true number_of_edges: 57515616 number_of_nodes: 1139905 -url: https://data.rapids.ai/cugraph/datasets/hollywood.csv \ No newline at end of file +url: https://data.rapids.ai/cugraph/datasets/hollywood.csv diff --git a/python/cugraph/cugraph/datasets/metadata/soc-livejournal1.yaml b/python/cugraph/cugraph/datasets/metadata/soc-livejournal1.yaml index fafc68acb9b..af0cbd4f047 100644 --- a/python/cugraph/cugraph/datasets/metadata/soc-livejournal1.yaml +++ b/python/cugraph/cugraph/datasets/metadata/soc-livejournal1.yaml @@ -3,7 +3,7 @@ file_type: .csv description: A graph of the LiveJournal social network. author: L. Backstrom, D. Huttenlocher, J. Kleinberg, X. Lan refs: - L. Backstrom, D. Huttenlocher, J. Kleinberg, X. Lan. Group Formation in + L. Backstrom, D. Huttenlocher, J. Kleinberg, X. Lan. Group Formation in Large Social Networks Membership, Growth, and Evolution. KDD, 2006. delim: " " header: None @@ -19,4 +19,4 @@ is_multigraph: false is_symmetric: false number_of_edges: 68993773 number_of_nodes: 4847571 -url: https://data.rapids.ai/cugraph/datasets/soc-LiveJournal1.csv \ No newline at end of file +url: https://data.rapids.ai/cugraph/datasets/soc-LiveJournal1.csv diff --git a/python/cugraph/cugraph/datasets/metadata/soc-twitter-2010.yaml b/python/cugraph/cugraph/datasets/metadata/soc-twitter-2010.yaml index df5df5735af..184d69a8da1 100644 --- a/python/cugraph/cugraph/datasets/metadata/soc-twitter-2010.yaml +++ b/python/cugraph/cugraph/datasets/metadata/soc-twitter-2010.yaml @@ -3,8 +3,8 @@ file_type: .csv description: A network of follower relationships from a snapshot of Twitter in 2010, where an edge from i to j indicates that j is a follower of i. author: H. Kwak, C. Lee, H. Park, S. Moon refs: - J. Yang, J. Leskovec. Temporal Variation in Online Media. ACM Intl. - Conf. on Web Search and Data Mining (WSDM '11), 2011. + J. Yang, J. Leskovec. Temporal Variation in Online Media. ACM Intl. + Conf. on Web Search and Data Mining (WSDM '11), 2011. delim: " " header: None col_names: @@ -19,4 +19,4 @@ is_multigraph: false is_symmetric: false number_of_edges: 530051354 number_of_nodes: 21297772 -url: https://data.rapids.ai/cugraph/datasets/soc-twitter-2010.csv \ No newline at end of file +url: https://data.rapids.ai/cugraph/datasets/soc-twitter-2010.csv diff --git a/python/cugraph/cugraph/layout/force_atlas2.py b/python/cugraph/cugraph/layout/force_atlas2.py index 0e15eee718f..639801c9b59 100644 --- a/python/cugraph/cugraph/layout/force_atlas2.py +++ b/python/cugraph/cugraph/layout/force_atlas2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,6 +13,7 @@ from cugraph.layout import force_atlas2_wrapper from cugraph.utilities import ensure_cugraph_obj_for_nx +import cudf def force_atlas2( @@ -55,8 +56,8 @@ def force_atlas2( Above 1000 iterations is discouraged. pos_list: cudf.DataFrame, optional (default=None) - Data frame with initial vertex positions containing two columns: - 'x' and 'y' positions. + Data frame with initial vertex positions containing three columns: + 'vertex', 'x' and 'y' positions. outbound_attraction_distribution: bool, optional (default=True) Distributes attraction along outbound edges. @@ -131,6 +132,10 @@ def on_train_end(self, positions): input_graph, isNx = ensure_cugraph_obj_for_nx(input_graph) if pos_list is not None: + if not isinstance(pos_list, cudf.DataFrame): + raise TypeError("pos_list should be a cudf.DataFrame") + if set(pos_list.columns) != set(["x", "y", "vertex"]): + raise ValueError("pos_list has wrong column names") if input_graph.renumbered is True: if input_graph.vertex_column_size() > 1: cols = pos_list.columns[:-2].to_list() diff --git a/python/cugraph/cugraph/structure/graph_classes.py b/python/cugraph/cugraph/structure/graph_classes.py index 03efcba0307..f48895c90b9 100644 --- a/python/cugraph/cugraph/structure/graph_classes.py +++ b/python/cugraph/cugraph/structure/graph_classes.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -426,7 +426,7 @@ def from_pandas_edgelist( ... edge_attr='2', renumber=False) """ - if not isinstance(pdf, pd.core.frame.DataFrame): + if not isinstance(pdf, pd.DataFrame): raise TypeError("pdf input is not a Pandas DataFrame") gdf = cudf.DataFrame.from_pandas(pdf) @@ -450,7 +450,7 @@ def from_pandas_adjacency(self, pdf): pdf : pandas.DataFrame A DataFrame that contains adjacency information """ - if not isinstance(pdf, pd.core.frame.DataFrame): + if not isinstance(pdf, pd.DataFrame): raise TypeError("pdf input is not a Pandas DataFrame") np_array = pdf.to_numpy() diff --git a/python/cugraph/cugraph/structure/graph_utilities.pxd b/python/cugraph/cugraph/structure/graph_utilities.pxd index 5085aa42216..39e2cdbbff5 100644 --- a/python/cugraph/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/cugraph/structure/graph_utilities.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -29,4 +29,3 @@ from pylibraft.common.handle cimport handle_t cdef extern from "" namespace "std" nogil: cdef device_buffer move(device_buffer) cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) - diff --git a/python/cugraph/cugraph/structure/hypergraph.py b/python/cugraph/cugraph/structure/hypergraph.py index 4add74d6061..add68cb6dac 100644 --- a/python/cugraph/cugraph/structure/hypergraph.py +++ b/python/cugraph/cugraph/structure/hypergraph.py @@ -185,7 +185,7 @@ def hypergraph( events.reset_index(drop=True, inplace=True) if EVENTID not in events.columns: - events[EVENTID] = cudf.core.index.RangeIndex(len(events)) + events[EVENTID] = cudf.RangeIndex(len(events)) events[EVENTID] = _prepend_str(events[EVENTID], EVENTID + DELIM) events[NODETYPE] = ( @@ -596,6 +596,4 @@ def _prepend_str(col, val): # Make an empty categorical string dtype def _empty_cat_dt(): - return cudf.core.dtypes.CategoricalDtype( - categories=np.array([], dtype="str"), ordered=False - ) + return cudf.CategoricalDtype(categories=np.array([], dtype="str"), ordered=False) diff --git a/python/cugraph/cugraph/tests/utils/test_dataset.py b/python/cugraph/cugraph/tests/utils/test_dataset.py index 39f7ed8850b..9331cefcfda 100644 --- a/python/cugraph/cugraph/tests/utils/test_dataset.py +++ b/python/cugraph/cugraph/tests/utils/test_dataset.py @@ -171,14 +171,14 @@ def test_reader(dataset): E = dataset.get_edgelist(download=True) assert E is not None - assert isinstance(E, cudf.core.dataframe.DataFrame) + assert isinstance(E, cudf.DataFrame) dataset.unload() # using pandas E_pd = dataset.get_edgelist(download=True, reader="pandas") assert E_pd is not None - assert isinstance(E_pd, pandas.core.frame.DataFrame) + assert isinstance(E_pd, pandas.DataFrame) dataset.unload() with pytest.raises(ValueError): diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/README.md b/python/pylibcugraph/pylibcugraph/_cugraph_c/README.md index cb0c6edff2b..7aa1b2e5680 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/README.md +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/README.md @@ -2,4 +2,4 @@ This directory contains cython `.pxd` files which describe the cugraph C library to cython. The contents here are simply a mapping of the cugraph_c C APIs to -cython for use in the cython code in the parent directory. \ No newline at end of file +cython for use in the cython code in the parent directory. diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd index 29c6d79e08d..b0e7ffaf82d 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -135,7 +135,7 @@ cdef extern from "cugraph_c/algorithms.h": cugraph_random_walk_result_get_path_sizes( cugraph_random_walk_result_t* result ) - + cdef size_t \ cugraph_random_walk_result_get_max_path_length( cugraph_random_walk_result_t* result @@ -187,17 +187,17 @@ cdef extern from "cugraph_c/algorithms.h": cugraph_sample_result_get_destinations( const cugraph_sample_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_majors( const cugraph_sample_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_minors( const cugraph_sample_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_major_offsets( const cugraph_sample_result_t* result @@ -207,27 +207,27 @@ cdef extern from "cugraph_c/algorithms.h": cugraph_sample_result_get_index( const cugraph_sample_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_edge_weight( const cugraph_sample_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_edge_id( const cugraph_sample_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_edge_type( const cugraph_sample_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_hop( const cugraph_sample_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_label_hop_offsets( const cugraph_sample_result_t* result @@ -237,7 +237,7 @@ cdef extern from "cugraph_c/algorithms.h": cugraph_sample_result_get_start_labels( const cugraph_sample_result_t* result ) - + # Deprecated cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_offsets( @@ -264,15 +264,15 @@ cdef extern from "cugraph_c/algorithms.h": cugraph_sample_result_t** result, cugraph_error_t** error ) - + ctypedef struct cugraph_sampling_options_t: pass - + ctypedef enum cugraph_prior_sources_behavior_t: DEFAULT=0 CARRY_OVER EXCLUDE - + ctypedef enum cugraph_compression_type_t: COO=0 CSR @@ -285,7 +285,7 @@ cdef extern from "cugraph_c/algorithms.h": cugraph_sampling_options_t** options, cugraph_error_t** error, ) - + cdef void \ cugraph_sampling_set_renumber_results( cugraph_sampling_options_t* options, @@ -297,7 +297,7 @@ cdef extern from "cugraph_c/algorithms.h": cugraph_sampling_options_t* options, bool_t value, ) - + cdef void \ cugraph_sampling_set_return_hops( cugraph_sampling_options_t* options, @@ -315,19 +315,19 @@ cdef extern from "cugraph_c/algorithms.h": cugraph_sampling_options_t* options, bool_t value, ) - + cdef void \ cugraph_sampling_set_compress_per_hop( cugraph_sampling_options_t* options, bool_t value, ) - + cdef void \ cugraph_sampling_set_compression_type( cugraph_sampling_options_t* options, cugraph_compression_type_t value, ) - + cdef void \ cugraph_sampling_options_free( cugraph_sampling_options_t* options, @@ -343,7 +343,7 @@ cdef extern from "cugraph_c/algorithms.h": cugraph_random_walk_result_t** result, cugraph_error_t** error ) - + # biased random walks cdef cugraph_error_code_t \ cugraph_based_random_walks( diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd index 532df624c99..6e9a5432974 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/centrality_algorithms.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -212,7 +212,7 @@ cdef extern from "cugraph_c/centrality_algorithms.h": cugraph_centrality_result_t** result, cugraph_error_t** error ) - + ########################################################################### # edge betweenness centrality @@ -223,7 +223,7 @@ cdef extern from "cugraph_c/centrality_algorithms.h": cugraph_edge_centrality_result_get_src_vertices( cugraph_edge_centrality_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_edge_centrality_result_get_dst_vertices( cugraph_edge_centrality_result_t* result @@ -233,17 +233,17 @@ cdef extern from "cugraph_c/centrality_algorithms.h": cugraph_edge_centrality_result_get_edge_ids( cugraph_edge_centrality_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_edge_centrality_result_get_values( cugraph_edge_centrality_result_t* result ) - + cdef void \ cugraph_edge_centrality_result_free( cugraph_edge_centrality_result_t* result ) - + cdef cugraph_error_code_t \ cugraph_edge_betweenness_centrality( const cugraph_resource_handle_t* handle, diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd index 3c273b7d3fa..0e9529146cc 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -48,17 +48,17 @@ cdef extern from "cugraph_c/community_algorithms.h": cugraph_triangle_count_result_get_vertices( cugraph_triangle_count_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_triangle_count_result_get_counts( cugraph_triangle_count_result_t* result ) - + cdef void \ cugraph_triangle_count_result_free( cugraph_triangle_count_result_t* result ) - + cdef cugraph_error_code_t \ cugraph_triangle_count( const cugraph_resource_handle_t* handle, @@ -83,7 +83,7 @@ cdef extern from "cugraph_c/community_algorithms.h": cugraph_hierarchical_clustering_result_get_clusters( cugraph_hierarchical_clustering_result_t* result ) - + cdef double cugraph_hierarchical_clustering_result_get_modularity( cugraph_hierarchical_clustering_result_t* result ) @@ -104,7 +104,7 @@ cdef extern from "cugraph_c/community_algorithms.h": cugraph_hierarchical_clustering_result_t** result, cugraph_error_t** error ) - + # extract_ego cdef cugraph_error_code_t \ cugraph_extract_ego( @@ -116,7 +116,7 @@ cdef extern from "cugraph_c/community_algorithms.h": cugraph_induced_subgraph_result_t** result, cugraph_error_t** error ) - + # leiden ctypedef struct cugraph_hierarchical_clustering_result_t: pass @@ -130,7 +130,7 @@ cdef extern from "cugraph_c/community_algorithms.h": cugraph_hierarchical_clustering_result_get_clusters( cugraph_hierarchical_clustering_result_t* result ) - + cdef double cugraph_hierarchical_clustering_result_get_modularity( cugraph_hierarchical_clustering_result_t* result ) @@ -153,23 +153,39 @@ cdef extern from "cugraph_c/community_algorithms.h": cugraph_error_t** error ) ########################################################################### + # Legacy ECG + cdef cugraph_error_code_t \ + cugraph_legacy_ecg( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + double min_weight, + size_t ensemble_size, + bool_t do_expensive_check, + cugraph_hierarchical_clustering_result_t** result, + cugraph_error_t** error + ) + # ECG cdef cugraph_error_code_t \ cugraph_ecg( const cugraph_resource_handle_t* handle, + cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, double min_weight, size_t ensemble_size, + size_t max_level, + double threshold, + double resolution, bool_t do_expensive_check, cugraph_hierarchical_clustering_result_t** result, cugraph_error_t** error ) - + ########################################################################### # Clustering ctypedef struct cugraph_clustering_result_t: pass - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_clustering_result_get_vertices( cugraph_clustering_result_t* result @@ -179,7 +195,7 @@ cdef extern from "cugraph_c/community_algorithms.h": cugraph_clustering_result_get_clusters( cugraph_clustering_result_t* result ) - + cdef void \ cugraph_clustering_result_free( cugraph_clustering_result_t* result @@ -200,7 +216,7 @@ cdef extern from "cugraph_c/community_algorithms.h": cugraph_clustering_result_t** result, cugraph_error_t** error ) - + # Spectral modularity maximization cdef cugraph_error_code_t \ cugraph_spectral_modularity_maximization( @@ -216,7 +232,7 @@ cdef extern from "cugraph_c/community_algorithms.h": cugraph_clustering_result_t** result, cugraph_error_t** error ) - + # Analyze clustering modularity cdef cugraph_error_code_t \ cugraph_analyze_clustering_modularity( @@ -228,7 +244,7 @@ cdef extern from "cugraph_c/community_algorithms.h": double* score, cugraph_error_t** error ) - + # Analyze clustering edge cut cdef cugraph_error_code_t \ cugraph_analyze_clustering_edge_cut( @@ -240,7 +256,7 @@ cdef extern from "cugraph_c/community_algorithms.h": double* score, cugraph_error_t** error ) - + # Analyze clustering ratio cut cdef cugraph_error_code_t \ cugraph_analyze_clustering_ratio_cut( diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd index 4d3509e8b7f..ed25faa33f4 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -36,7 +36,7 @@ cdef extern from "cugraph_c/core_algorithms.h": # core number ctypedef struct cugraph_core_result_t: pass - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_core_result_get_vertices( cugraph_core_result_t* result @@ -46,7 +46,7 @@ cdef extern from "cugraph_c/core_algorithms.h": cugraph_core_result_get_core_numbers( cugraph_core_result_t* result ) - + cdef void \ cugraph_core_result_free( cugraph_core_result_t* result @@ -66,7 +66,7 @@ cdef extern from "cugraph_c/core_algorithms.h": cugraph_core_result_t** result, cugraph_error_t** error ) - + ########################################################################### # k-core ctypedef struct cugraph_k_core_result_t: @@ -76,22 +76,22 @@ cdef extern from "cugraph_c/core_algorithms.h": cugraph_k_core_result_get_src_vertices( cugraph_k_core_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_k_core_result_get_dst_vertices( cugraph_k_core_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_k_core_result_get_weights( cugraph_k_core_result_t* result ) - + cdef void \ cugraph_k_core_result_free( cugraph_k_core_result_t* result ) - + cdef cugraph_error_code_t \ cugraph_core_result_create( const cugraph_resource_handle_t* handle, @@ -100,7 +100,7 @@ cdef extern from "cugraph_c/core_algorithms.h": cugraph_core_result_t** core_result, cugraph_error_t** error ) - + cdef cugraph_error_code_t \ cugraph_k_core( const cugraph_resource_handle_t* handle, diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph.pxd index 28a9f5a3be5..4247bcc1b2a 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -51,7 +51,7 @@ cdef extern from "cugraph_c/graph.h": bool_t check, cugraph_graph_t** graph, cugraph_error_t** error) - + # Supports isolated vertices cdef cugraph_error_code_t \ cugraph_graph_create_sg( @@ -76,7 +76,7 @@ cdef extern from "cugraph_c/graph.h": cugraph_sg_graph_free( cugraph_graph_t* graph ) - + # FIXME: Might want to delete 'cugraph_sg_graph_free' and replace # 'cugraph_mg_graph_free' by 'cugraph_graph_free' cdef void \ @@ -105,7 +105,7 @@ cdef extern from "cugraph_c/graph.h": cugraph_mg_graph_free( cugraph_graph_t* graph ) - + cdef cugraph_error_code_t \ cugraph_sg_graph_create_from_csr( const cugraph_resource_handle_t* handle, @@ -121,7 +121,7 @@ cdef extern from "cugraph_c/graph.h": cugraph_graph_t** graph, cugraph_error_t** error ) - + cdef cugraph_error_code_t \ cugraph_graph_create_sg_from_csr( const cugraph_resource_handle_t* handle, @@ -137,12 +137,12 @@ cdef extern from "cugraph_c/graph.h": cugraph_graph_t** graph, cugraph_error_t** error ) - + cdef void \ cugraph_sg_graph_free( cugraph_graph_t* graph ) - + cdef cugraph_error_code_t \ cugraph_mg_graph_create( const cugraph_resource_handle_t* handle, @@ -158,7 +158,7 @@ cdef extern from "cugraph_c/graph.h": cugraph_graph_t** graph, cugraph_error_t** error ) - + cdef cugraph_error_code_t \ cugraph_graph_create_mg( const cugraph_resource_handle_t* handle, @@ -176,7 +176,7 @@ cdef extern from "cugraph_c/graph.h": bool_t do_expensive_check, cugraph_graph_t** graph, cugraph_error_t** error) - + cdef void \ cugraph_mg_graph_free( cugraph_graph_t* graph diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd index 8b3a629956c..90bc041e5f0 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -42,7 +42,7 @@ cdef extern from "cugraph_c/graph_functions.h": #""" ctypedef struct cugraph_vertex_pairs_t: pass - + from pylibcugraph._cugraph_c.error cimport ( cugraph_error_code_t, @@ -61,22 +61,22 @@ cdef extern from "cugraph_c/graph_functions.h": # vertex_pairs ctypedef struct cugraph_vertex_pairs_t: pass - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_vertex_pairs_get_first( cugraph_vertex_pairs_t* vertex_pairs ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_vertex_pairs_get_second( cugraph_vertex_pairs_t* vertex_pairs ) - + cdef void \ cugraph_vertex_pairs_free( cugraph_vertex_pairs_t* vertex_pairs ) - + cdef cugraph_error_code_t \ cugraph_create_vertex_pairs( const cugraph_resource_handle_t* handle, @@ -86,21 +86,21 @@ cdef extern from "cugraph_c/graph_functions.h": cugraph_vertex_pairs_t** vertex_pairs, cugraph_error_t** error ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_vertex_pairs_get_first( cugraph_vertex_pairs_t* vertex_pairs ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_vertex_pairs_get_second( cugraph_vertex_pairs_t* vertex_pairs ) - + cdef void cugraph_vertex_pairs_free( cugraph_vertex_pairs_t* vertex_pairs ) - + cdef cugraph_error_code_t cugraph_two_hop_neighbors( const cugraph_resource_handle_t* handle, const cugraph_graph_t* graph, @@ -117,7 +117,7 @@ cdef extern from "cugraph_c/graph_functions.h": cugraph_vertex_pairs_t** result, cugraph_error_t** error ) - + ########################################################################### # induced_subgraph ctypedef struct cugraph_induced_subgraph_result_t: @@ -127,37 +127,37 @@ cdef extern from "cugraph_c/graph_functions.h": cugraph_induced_subgraph_get_sources( cugraph_induced_subgraph_result_t* induced_subgraph ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_induced_subgraph_get_destinations( cugraph_induced_subgraph_result_t* induced_subgraph ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_induced_subgraph_get_edge_weights( cugraph_induced_subgraph_result_t* induced_subgraph ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_induced_subgraph_get_edge_ids( cugraph_induced_subgraph_result_t* induced_subgraph ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_induced_subgraph_get_edge_type_ids( cugraph_induced_subgraph_result_t* induced_subgraph ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_induced_subgraph_get_subgraph_offsets( cugraph_induced_subgraph_result_t* induced_subgraph ) - + cdef void \ cugraph_induced_subgraph_result_free( cugraph_induced_subgraph_result_t* induced_subgraph ) - + cdef cugraph_error_code_t \ cugraph_extract_induced_subgraph( const cugraph_resource_handle_t* handle, diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/labeling_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/labeling_algorithms.pxd index 7c911235a54..53c4c382d58 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/labeling_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/labeling_algorithms.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -41,17 +41,17 @@ cdef extern from "cugraph_c/labeling_algorithms.h": cugraph_labeling_result_get_vertices( cugraph_labeling_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_labeling_result_get_labels( cugraph_labeling_result_t* result ) - + cdef void \ cugraph_labeling_result_free( cugraph_labeling_result_t* result ) - + cdef cugraph_error_code_t \ cugraph_weakly_connected_components( const cugraph_resource_handle_t* handle, @@ -60,4 +60,3 @@ cdef extern from "cugraph_c/labeling_algorithms.h": cugraph_labeling_result_t** result, cugraph_error_t** error ) - \ No newline at end of file diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/resource_handle.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/resource_handle.pxd index e9e74723e06..f3f43d55dff 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/resource_handle.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/resource_handle.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -29,7 +29,7 @@ cdef extern from "cugraph_c/resource_handle.h": FLOAT32 FLOAT64 SIZE_T - + ctypedef data_type_id_t cugraph_data_type_id_t ctypedef int8_t byte_t diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/similarity_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/similarity_algorithms.pxd index 0d98bb8e14a..406094f18d5 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/similarity_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/similarity_algorithms.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -45,12 +45,12 @@ cdef extern from "cugraph_c/similarity_algorithms.h": cugraph_similarity_result_get_similarity( cugraph_similarity_result_t* result ) - + cdef void \ cugraph_similarity_result_free( cugraph_similarity_result_t* result ) - + ########################################################################### # jaccard coefficients cdef cugraph_error_code_t \ diff --git a/python/pylibcugraph/pylibcugraph/analyze_clustering_modularity.pyx b/python/pylibcugraph/pylibcugraph/analyze_clustering_modularity.pyx index 2e7c1d2f649..907d08bef67 100644 --- a/python/pylibcugraph/pylibcugraph/analyze_clustering_modularity.pyx +++ b/python/pylibcugraph/pylibcugraph/analyze_clustering_modularity.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -133,7 +133,7 @@ def analyze_clustering_modularity(ResourceHandle resource_handle, &score, &error_ptr) assert_success(error_code, error_ptr, "cugraph_analyze_clustering_modularity") - + if vertex is not None: cugraph_type_erased_device_array_view_free(vertex_view_ptr) if cluster is not None: diff --git a/python/pylibcugraph/pylibcugraph/balanced_cut_clustering.pyx b/python/pylibcugraph/pylibcugraph/balanced_cut_clustering.pyx index a1a5c8182eb..e0ec2f77233 100644 --- a/python/pylibcugraph/pylibcugraph/balanced_cut_clustering.pyx +++ b/python/pylibcugraph/pylibcugraph/balanced_cut_clustering.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -90,7 +90,7 @@ def balanced_cut_clustering(ResourceHandle resource_handle, kmean_max_iter: size_t Specifies the maximum number of iterations for the k-means solver. - + do_expensive_check : bool_t If True, performs more extensive tests on the inputs to ensure validitity, at the expense of increased run time. diff --git a/python/pylibcugraph/pylibcugraph/betweenness_centrality.pyx b/python/pylibcugraph/pylibcugraph/betweenness_centrality.pyx index 5087314c725..85df88b9001 100644 --- a/python/pylibcugraph/pylibcugraph/betweenness_centrality.pyx +++ b/python/pylibcugraph/pylibcugraph/betweenness_centrality.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -93,7 +93,7 @@ def betweenness_centrality(ResourceHandle resource_handle, Using None defaults to a hash of process id, time, and hostname If k is either None or list or cudf objects: random_state parameter is ignored. - + normalized : bool_t Normalization will ensure that values are in [0, 1]. @@ -102,7 +102,7 @@ def betweenness_centrality(ResourceHandle resource_handle, do_expensive_check : bool_t A flag to run expensive checks for input arguments if True. - + Returns ------- @@ -113,7 +113,7 @@ def betweenness_centrality(ResourceHandle resource_handle, if isinstance(k, int): # randomly select vertices - + #'select_random_vertices' internally creates a # 'pylibcugraph.random.CuGraphRandomState' vertex_list = select_random_vertices( @@ -150,7 +150,7 @@ def betweenness_centrality(ResourceHandle resource_handle, cugraph_centrality_result_get_vertices(result_ptr) cdef cugraph_type_erased_device_array_view_t* values_ptr = \ cugraph_centrality_result_get_values(result_ptr) - + cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) cupy_values = copy_to_cupy_array(c_resource_handle_ptr, values_ptr) diff --git a/python/pylibcugraph/pylibcugraph/bfs.pyx b/python/pylibcugraph/pylibcugraph/bfs.pyx index 3034dcc8cb1..e65e94a8020 100644 --- a/python/pylibcugraph/pylibcugraph/bfs.pyx +++ b/python/pylibcugraph/pylibcugraph/bfs.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -52,8 +52,8 @@ from pylibcugraph.graphs cimport ( _GPUGraph, ) -def bfs(ResourceHandle handle, _GPUGraph graph, - sources, bool_t direction_optimizing, int32_t depth_limit, +def bfs(ResourceHandle handle, _GPUGraph graph, + sources, bool_t direction_optimizing, int32_t depth_limit, bool_t compute_predecessors, bool_t do_expensive_check): """ Performs a Breadth-first search starting from the provided sources. @@ -110,10 +110,10 @@ def bfs(ResourceHandle handle, _GPUGraph graph, >>> weights = G.edgelist.edgelist_df['weights'] >>> >>> sg = SGGraph( - >>> resource_handle = handle, - >>> graph_properties = GraphProperties(is_multigraph=G.is_multigraph()), - >>> src_array = srcs, - >>> dst_array = dsts, + >>> resource_handle = handle, + >>> graph_properties = GraphProperties(is_multigraph=G.is_multigraph()), + >>> src_array = srcs, + >>> dst_array = dsts, >>> weight_array = weights, >>> store_transposed=False, >>> renumber=False, @@ -121,7 +121,7 @@ def bfs(ResourceHandle handle, _GPUGraph graph, >>> ) >>> >>> res = pylibcugraph_bfs( - >>> handle, + >>> handle, >>> sg, >>> cudf.Series([0], dtype='int32'), >>> False, @@ -164,7 +164,7 @@ def bfs(ResourceHandle handle, _GPUGraph graph, cai_sources_ptr, len(sources), get_c_type_from_numpy_type(sources.dtype)) - + cdef cugraph_paths_result_t* result_ptr error_code = cugraph_bfs( @@ -186,7 +186,7 @@ def bfs(ResourceHandle handle, _GPUGraph graph, cdef cugraph_type_erased_device_array_view_t* predecessors_ptr = \ cugraph_paths_result_get_predecessors(result_ptr) - + cdef cugraph_type_erased_device_array_view_t* vertices_ptr = \ cugraph_paths_result_get_vertices(result_ptr) @@ -194,7 +194,7 @@ def bfs(ResourceHandle handle, _GPUGraph graph, cupy_distances = copy_to_cupy_array(c_resource_handle_ptr, distances_ptr) cupy_predecessors = copy_to_cupy_array(c_resource_handle_ptr, predecessors_ptr) cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) - + # deallocate the no-longer needed result struct cugraph_paths_result_free(result_ptr) diff --git a/python/pylibcugraph/pylibcugraph/components/_connectivity.pyx b/python/pylibcugraph/pylibcugraph/components/_connectivity.pyx index 02e7549d1c5..9f5b84f260d 100644 --- a/python/pylibcugraph/pylibcugraph/components/_connectivity.pyx +++ b/python/pylibcugraph/pylibcugraph/components/_connectivity.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -128,4 +128,3 @@ def strongly_connected_components(offsets, indices, weights, num_verts, num_edge cdef cugraph_cc_t connect_type=CUGRAPH_STRONG connected_components(g, connect_type, c_labels) - diff --git a/python/pylibcugraph/pylibcugraph/core_number.pyx b/python/pylibcugraph/pylibcugraph/core_number.pyx index 7d0c42f7dd0..50ce5bfe965 100644 --- a/python/pylibcugraph/pylibcugraph/core_number.pyx +++ b/python/pylibcugraph/pylibcugraph/core_number.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -31,7 +31,7 @@ from pylibcugraph._cugraph_c.array cimport ( from pylibcugraph._cugraph_c.graph cimport ( cugraph_graph_t, ) -from pylibcugraph._cugraph_c.core_algorithms cimport ( +from pylibcugraph._cugraph_c.core_algorithms cimport ( cugraph_core_result_t, cugraph_core_number, cugraph_k_core_degree_type_t, @@ -63,17 +63,17 @@ def core_number(ResourceHandle resource_handle, resource_handle: ResourceHandle Handle to the underlying device and host resource needed for referencing data and running algorithms. - + graph : SGGraph or MGGraph The input graph, for either Single or Multi-GPU operations. - + degree_type: str This option determines if the core number computation should be based on input, output, or both directed edges, with valid values being "incoming", "outgoing", and "bidirectional" respectively. This option is currently ignored in this release, and setting it will result in a warning. - + do_expensive_check: bool If True, performs more extensive tests on the inputs to ensure validity, at the expense of increased run time. diff --git a/python/pylibcugraph/pylibcugraph/ecg.pyx b/python/pylibcugraph/pylibcugraph/ecg.pyx index 4188aaa213e..4e5407348e3 100644 --- a/python/pylibcugraph/pylibcugraph/ecg.pyx +++ b/python/pylibcugraph/pylibcugraph/ecg.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -31,6 +31,7 @@ from pylibcugraph._cugraph_c.graph cimport ( ) from pylibcugraph._cugraph_c.community_algorithms cimport ( cugraph_hierarchical_clustering_result_t, + cugraph_legacy_ecg, cugraph_ecg, cugraph_hierarchical_clustering_result_get_vertices, cugraph_hierarchical_clustering_result_get_clusters, @@ -72,7 +73,7 @@ def ecg(ResourceHandle resource_handle, graph : SGGraph The input graph. - + min_weight : double, optional (default=0.5) The minimum value to assign as an edgeweight in the ECG algorithm. It should be a value in the range [0,1] usually left as the default @@ -82,7 +83,7 @@ def ecg(ResourceHandle resource_handle, The number of graph permutations to use for the ensemble. The default value is 16, larger values may produce higher quality partitions for some graphs. - + do_expensive_check : bool_t If True, performs more extensive tests on the inputs to ensure validitity, at the expense of increased run time. @@ -124,7 +125,7 @@ def ecg(ResourceHandle resource_handle, cdef cugraph_error_code_t error_code cdef cugraph_error_t* error_ptr - error_code = cugraph_ecg(c_resource_handle_ptr, + error_code = cugraph_legacy_ecg(c_resource_handle_ptr, c_graph_ptr, min_weight, ensemble_size, diff --git a/python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx b/python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx index e1dae1ff10a..75a950eaa34 100644 --- a/python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx +++ b/python/pylibcugraph/pylibcugraph/edge_betweenness_centrality.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -92,13 +92,13 @@ def edge_betweenness_centrality(ResourceHandle resource_handle, Using None defaults to a hash of process id, time, and hostname If k is either None or list or cudf objects: random_state parameter is ignored. - + normalized : bool_t Normalization will ensure that values are in [0, 1]. do_expensive_check : bool_t A flag to run expensive checks for input arguments if True. - + Returns ------- A tuple of device arrays corresponding to the sources, destinations, edge @@ -140,7 +140,7 @@ def edge_betweenness_centrality(ResourceHandle resource_handle, if isinstance(k, int): # randomly select vertices - + #'select_random_vertices' internally creates a # 'pylibcugraph.random.CuGraphRandomState' vertex_list = select_random_vertices( @@ -179,14 +179,14 @@ def edge_betweenness_centrality(ResourceHandle resource_handle, cugraph_edge_centrality_result_get_dst_vertices(result_ptr) cdef cugraph_type_erased_device_array_view_t* values_ptr = \ cugraph_edge_centrality_result_get_values(result_ptr) - + if graph.edge_id_view_ptr is NULL and graph.edge_id_view_ptr_ptr is NULL: cupy_edge_ids = None else: edge_ids_ptr = cugraph_edge_centrality_result_get_edge_ids(result_ptr) cupy_edge_ids = copy_to_cupy_array(c_resource_handle_ptr, edge_ids_ptr) - - + + cupy_src_vertices = copy_to_cupy_array(c_resource_handle_ptr, src_ptr) cupy_dst_vertices = copy_to_cupy_array(c_resource_handle_ptr, dst_ptr) cupy_values = copy_to_cupy_array(c_resource_handle_ptr, values_ptr) diff --git a/python/pylibcugraph/pylibcugraph/egonet.pyx b/python/pylibcugraph/pylibcugraph/egonet.pyx index e7237cc3ba4..070ea1fdd85 100644 --- a/python/pylibcugraph/pylibcugraph/egonet.pyx +++ b/python/pylibcugraph/pylibcugraph/egonet.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -153,10 +153,10 @@ def ego_graph(ResourceHandle resource_handle, # for perfomance improvement cupy_sources = copy_to_cupy_array( c_resource_handle_ptr, sources_ptr) - + cupy_destinations = copy_to_cupy_array( c_resource_handle_ptr, destinations_ptr) - + if edge_weights_ptr is not NULL: cupy_edge_weights = copy_to_cupy_array( c_resource_handle_ptr, edge_weights_ptr) diff --git a/python/pylibcugraph/pylibcugraph/eigenvector_centrality.pyx b/python/pylibcugraph/pylibcugraph/eigenvector_centrality.pyx index 568f072ee3d..baa69eb2e30 100644 --- a/python/pylibcugraph/pylibcugraph/eigenvector_centrality.pyx +++ b/python/pylibcugraph/pylibcugraph/eigenvector_centrality.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -127,7 +127,7 @@ def eigenvector_centrality(ResourceHandle resource_handle, cugraph_centrality_result_get_vertices(result_ptr) cdef cugraph_type_erased_device_array_view_t* values_ptr = \ cugraph_centrality_result_get_values(result_ptr) - + cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) cupy_values = copy_to_cupy_array(c_resource_handle_ptr, values_ptr) diff --git a/python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx index d09d60ff15b..f38ad21d3b0 100644 --- a/python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx +++ b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -82,19 +82,19 @@ def generate_rmat_edgelist(ResourceHandle resource_handle, resource_handle : ResourceHandle Handle to the underlying device resources needed for referencing data and running algorithms. - + random_state : int , optional Random state to use when generating samples. Optional argument, defaults to a hash of process id, time, and hostname. (See pylibcugraph.random.CuGraphRandomState) - + scale : size_t Scale factor to set the number of vertices in the graph Vertex IDs have values in [0, V), where V = 1 << 'scale' - + num_edges : size_t Number of edges to generate - + a : double Probability of the edge being in the first partition The Graph 500 spec sets this value to 0.57 @@ -106,39 +106,39 @@ def generate_rmat_edgelist(ResourceHandle resource_handle, c : double Probability of the edge being in the third partition The Graph 500 spec sets this value to 0.19 - + clip_and_flip : bool Flag controlling whether to generate edges only in the lower triangular part (including the diagonal) of the graph adjacency matrix (if set to 'true') or not (if set to 'false). - + scramble_vertex_ids : bool Flag controlling whether to scramble vertex ID bits (if set to `true`) or not (if set to `false`); scrambling vertex ID bits breaks correlation between vertex ID values and vertex degrees. - + include_edge_weights : bool Flag controlling whether to generate edges with weights (if set to 'true') or not (if set to 'false'). minimum_weight : double Minimum weight value to generate (if 'include_edge_weights' is 'true') - + maximum_weight : double Maximum weight value to generate (if 'include_edge_weights' is 'true') - + dtype : string The type of weight to generate ("FLOAT32" or "FLOAT64"), ignored unless include_weights is true - + include_edge_ids : bool Flag controlling whether to generate edges with ids (if set to 'true') or not (if set to 'false'). - + include_edge_types : bool Flag controlling whether to generate edges with types (if set to 'true') or not (if set to 'false'). - + min_edge_type_value : int Minimum edge type to generate if 'include_edge_types' is 'true' otherwise, this parameter is ignored. @@ -146,7 +146,7 @@ def generate_rmat_edgelist(ResourceHandle resource_handle, max_edge_type_value : int Maximum edge type to generate if 'include_edge_types' is 'true' otherwise, this paramter is ignored. - + multi_gpu : bool Flag if the COO is being created on multiple GPUs @@ -188,7 +188,7 @@ def generate_rmat_edgelist(ResourceHandle resource_handle, cdef cugraph_type_erased_device_array_view_t* \ destinations_view_ptr = cugraph_coo_get_destinations(result_coo_ptr) - + cdef cugraph_type_erased_device_array_view_t* edge_weights_view_ptr cupy_edge_weights = None @@ -209,13 +209,13 @@ def generate_rmat_edgelist(ResourceHandle resource_handle, edge_weights_view_ptr = cugraph_coo_get_edge_weights(result_coo_ptr) cupy_edge_weights = copy_to_cupy_array(c_resource_handle_ptr, edge_weights_view_ptr) - + if include_edge_ids: error_code = cugraph_generate_edge_ids(c_resource_handle_ptr, result_coo_ptr, multi_gpu, &error_ptr) - + assert_success(error_code, error_ptr, "generate_edge_ids") edge_ids_view_ptr = cugraph_coo_get_edge_id(result_coo_ptr) @@ -228,7 +228,7 @@ def generate_rmat_edgelist(ResourceHandle resource_handle, min_edge_type_value, max_edge_type_value, &error_ptr) - + assert_success(error_code, error_ptr, "generate_edge_types") edge_type_view_ptr = cugraph_coo_get_edge_type(result_coo_ptr) @@ -236,10 +236,10 @@ def generate_rmat_edgelist(ResourceHandle resource_handle, - - + + cupy_sources = copy_to_cupy_array(c_resource_handle_ptr, sources_view_ptr) - cupy_destinations = copy_to_cupy_array(c_resource_handle_ptr, destinations_view_ptr) + cupy_destinations = copy_to_cupy_array(c_resource_handle_ptr, destinations_view_ptr) cugraph_coo_free(result_coo_ptr) diff --git a/python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx index d5a89f8a222..32af0c13fc0 100644 --- a/python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx +++ b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -88,24 +88,24 @@ def generate_rmat_edgelists(ResourceHandle resource_handle, resource_handle : ResourceHandle Handle to the underlying device resources needed for referencing data and running algorithms. - + random_state : int , optional Random state to use when generating samples. Optional argument, defaults to a hash of process id, time, and hostname. (See pylibcugraph.random.CuGraphRandomState) - + n_edgelists : size_t Number of edge lists (graphs) to generate - + min_scale : size_t Scale factor to set the minimum number of vertices in the graph max_scale : size_t Scale factor to set the maximum number of vertices in the graph - + edge_factor : size_t Average number of edges per vertex to generate - + size_distribution : int Distribution of the graph sizes, impacts the scale parameter of the R-MAT generator. @@ -115,39 +115,39 @@ def generate_rmat_edgelists(ResourceHandle resource_handle, Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d, are set. '0' for POWER_LAW distribution and '1' for UNIFORM distribution - + clip_and_flip : bool Flag controlling whether to generate edges only in the lower triangular part (including the diagonal) of the graph adjacency matrix (if set to 'true') or not (if set to 'false') - + scramble_vertex_ids : bool Flag controlling whether to scramble vertex ID bits (if set to `true`) or not (if set to `false`); scrambling vertex ID bits breaks correlation between vertex ID values and vertex degrees. - + include_edge_weights : bool Flag controlling whether to generate edges with weights (if set to 'true') or not (if set to 'false'). minimum_weight : double Minimum weight value to generate (if 'include_edge_weights' is 'true') - + maximum_weight : double Maximum weight value to generate (if 'include_edge_weights' is 'true') - + dtype : string The type of weight to generate ("FLOAT32" or "FLOAT64"), ignored unless include_weights is true - + include_edge_ids : bool Flag controlling whether to generate edges with ids (if set to 'true') or not (if set to 'false'). - + include_edge_types : bool Flag controlling whether to generate edges with types (if set to 'true') or not (if set to 'false'). - + min_edge_type_value : int Minimum edge type to generate if 'include_edge_types' is 'true' otherwise, this parameter is ignored. @@ -178,12 +178,12 @@ def generate_rmat_edgelists(ResourceHandle resource_handle, cdef cugraph_generator_distribution_t size_distribution_ cdef cugraph_generator_distribution_t edge_distribution_ - + if size_distribution == 0: size_distribution_ = cugraph_generator_distribution_t.POWER_LAW else: size_distribution_ = cugraph_generator_distribution_t.UNIFORM - + if edge_distribution == 0: edge_distribution_ = cugraph_generator_distribution_t.POWER_LAW else: @@ -237,7 +237,7 @@ def generate_rmat_edgelists(ResourceHandle resource_handle, edge_weights_view_ptr = cugraph_coo_get_edge_weights(result_coo_ptr) cupy_edge_weights = copy_to_cupy_array(c_resource_handle_ptr, edge_weights_view_ptr) - + if include_edge_ids: @@ -245,7 +245,7 @@ def generate_rmat_edgelists(ResourceHandle resource_handle, result_coo_ptr, multi_gpu, &error_ptr) - + assert_success(error_code, error_ptr, "generate_edge_ids") edge_ids_view_ptr = cugraph_coo_get_edge_id(result_coo_ptr) @@ -258,7 +258,7 @@ def generate_rmat_edgelists(ResourceHandle resource_handle, min_edge_type_value, max_edge_type_value, &error_ptr) - + assert_success(error_code, error_ptr, "generate_edge_types") edge_type_view_ptr = cugraph_coo_get_edge_type(result_coo_ptr) diff --git a/python/pylibcugraph/pylibcugraph/graphs.pxd b/python/pylibcugraph/pylibcugraph/graphs.pxd index dac69e0ad04..f9dd690e46f 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pxd +++ b/python/pylibcugraph/pylibcugraph/graphs.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,12 +26,11 @@ cdef class _GPUGraph: cdef cugraph_graph_t* c_graph_ptr cdef cugraph_type_erased_device_array_view_t* edge_id_view_ptr cdef cugraph_type_erased_device_array_view_t** edge_id_view_ptr_ptr - cdef cugraph_type_erased_device_array_view_t* weights_view_ptr - cdef cugraph_type_erased_device_array_view_t** weights_view_ptr_ptr + cdef cugraph_type_erased_device_array_view_t* weights_view_ptr + cdef cugraph_type_erased_device_array_view_t** weights_view_ptr_ptr cdef class SGGraph(_GPUGraph): pass cdef class MGGraph(_GPUGraph): pass - diff --git a/python/pylibcugraph/pylibcugraph/hits.pyx b/python/pylibcugraph/pylibcugraph/hits.pyx index 4524a4f70df..3204ac2ba57 100644 --- a/python/pylibcugraph/pylibcugraph/hits.pyx +++ b/python/pylibcugraph/pylibcugraph/hits.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -78,7 +78,7 @@ def hits(ResourceHandle resource_handle, graph : SGGraph or MGGraph The input graph, for either Single or Multi-GPU operations. - + tol : float, optional (default=1.0e-5) Set the tolerance the approximation, this parameter should be a small magnitude value. This parameter is not currently supported. @@ -104,7 +104,7 @@ def hits(ResourceHandle resource_handle, A tuple of device arrays, where the third item in the tuple is a device array containing the vertex identifiers, the first and second items are device arrays containing respectively the hubs and authorities values for the corresponding - vertices + vertices Examples -------- @@ -118,13 +118,13 @@ def hits(ResourceHandle resource_handle, cdef cugraph_type_erased_device_array_view_t* initial_hubs_guess_vertices_view_ptr = NULL cdef cugraph_type_erased_device_array_view_t* initial_hubs_guess_values_view_ptr = NULL - # FIXME: Add check ensuring that both initial_hubs_guess_vertices + # FIXME: Add check ensuring that both initial_hubs_guess_vertices # and initial_hubs_guess_values are passed when calling only pylibcugraph HITS. # This is already True for cugraph HITS - - if initial_hubs_guess_vertices is not None: + + if initial_hubs_guess_vertices is not None: assert_CAI_type(initial_hubs_guess_vertices, "initial_hubs_guess_vertices") - + cai_initial_hubs_guess_vertices_ptr = \ initial_hubs_guess_vertices.__cuda_array_interface__["data"][0] @@ -133,7 +133,7 @@ def hits(ResourceHandle resource_handle, cai_initial_hubs_guess_vertices_ptr, len(initial_hubs_guess_vertices), get_c_type_from_numpy_type(initial_hubs_guess_vertices.dtype)) - + if initial_hubs_guess_values is not None: assert_CAI_type(initial_hubs_guess_values, "initial_hubs_guess_values") @@ -179,13 +179,13 @@ def hits(ResourceHandle resource_handle, cupy_hubs = copy_to_cupy_array(c_resource_handle_ptr, hubs_ptr) cupy_authorities = copy_to_cupy_array(c_resource_handle_ptr, authorities_ptr) - + cugraph_hits_result_free(result_ptr) if initial_hubs_guess_vertices is not None: cugraph_type_erased_device_array_view_free( initial_hubs_guess_vertices_view_ptr) - + if initial_hubs_guess_values is not None: cugraph_type_erased_device_array_view_free( initial_hubs_guess_values_view_ptr) diff --git a/python/pylibcugraph/pylibcugraph/induced_subgraph.pyx b/python/pylibcugraph/pylibcugraph/induced_subgraph.pyx index 99b89ec2a58..6fd6d7b9af3 100644 --- a/python/pylibcugraph/pylibcugraph/induced_subgraph.pyx +++ b/python/pylibcugraph/pylibcugraph/induced_subgraph.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -69,10 +69,10 @@ def induced_subgraph(ResourceHandle resource_handle, graph : SGGraph or MGGraph The input graph. - + subgraph_vertices : cupy array array of vertices to include in extracted subgraph. - + subgraph_offsets : cupy array array of subgraph offsets into subgraph_vertices. diff --git a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx index 9f98b4f37b0..f588237942b 100644 --- a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx +++ b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -68,13 +68,13 @@ cdef class SamplingResult: if self.c_sample_result_ptr is NULL: raise ValueError("pointer not set, must call set_ptr() with a " "non-NULL value first.") - + cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = ( cugraph_sample_result_get_major_offsets(self.c_sample_result_ptr) ) if device_array_view_ptr is NULL: return None - + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) @@ -87,7 +87,7 @@ cdef class SamplingResult: ) if device_array_view_ptr is NULL: return None - + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) @@ -100,7 +100,7 @@ cdef class SamplingResult: ) if device_array_view_ptr is NULL: return None - + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) @@ -114,7 +114,7 @@ cdef class SamplingResult: ) if device_array_view_ptr is NULL: return None - + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) @@ -128,7 +128,7 @@ cdef class SamplingResult: ) if device_array_view_ptr is NULL: return None - + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) @@ -149,7 +149,7 @@ cdef class SamplingResult: def get_indices(self): # Deprecated return self.get_edge_weights() - + def get_edge_ids(self): if self.c_sample_result_ptr is NULL: raise ValueError("pointer not set, must call set_ptr() with a " @@ -177,7 +177,7 @@ cdef class SamplingResult: return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) - + def get_batch_ids(self): if self.c_sample_result_ptr is NULL: raise ValueError("pointer not set, must call set_ptr() with a " @@ -187,7 +187,7 @@ cdef class SamplingResult: ) if device_array_view_ptr is NULL: return None - + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) @@ -200,7 +200,7 @@ cdef class SamplingResult: ) if device_array_view_ptr is NULL: return None - + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) @@ -214,7 +214,7 @@ cdef class SamplingResult: ) if device_array_view_ptr is NULL: return None - + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) @@ -228,7 +228,7 @@ cdef class SamplingResult: ) if device_array_view_ptr is NULL: return None - + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) @@ -241,7 +241,7 @@ cdef class SamplingResult: ) if device_array_view_ptr is NULL: return None - + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) @@ -254,6 +254,6 @@ cdef class SamplingResult: ) if device_array_view_ptr is NULL: return None - + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, - self) \ No newline at end of file + self) diff --git a/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx b/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx index 59e94aeb615..9611f2ad884 100644 --- a/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -67,7 +67,7 @@ def jaccard_coefficients(ResourceHandle resource_handle, bool_t do_expensive_check): """ Compute the Jaccard coefficients for the specified vertex_pairs. - + Note that Jaccard similarity must run on a symmetric graph. Parameters @@ -78,13 +78,13 @@ def jaccard_coefficients(ResourceHandle resource_handle, graph : SGGraph or MGGraph The input graph, for either Single or Multi-GPU operations. - + first : Source of the vertex pair. - + second : Destination of the vertex pair. - + use_weight : bool, optional If set to True, the compute weighted jaccard_coefficients( the input graph must be weighted in that case). diff --git a/python/pylibcugraph/pylibcugraph/k_core.pyx b/python/pylibcugraph/pylibcugraph/k_core.pyx index c47cfef7a7a..6e37ee778e9 100644 --- a/python/pylibcugraph/pylibcugraph/k_core.pyx +++ b/python/pylibcugraph/pylibcugraph/k_core.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -31,7 +31,7 @@ from pylibcugraph._cugraph_c.array cimport ( from pylibcugraph._cugraph_c.graph cimport ( cugraph_graph_t, ) -from pylibcugraph._cugraph_c.core_algorithms cimport ( +from pylibcugraph._cugraph_c.core_algorithms cimport ( cugraph_core_result_t, cugraph_k_core_result_t, cugraph_core_result_create, @@ -72,21 +72,21 @@ def k_core(ResourceHandle resource_handle, resource_handle: ResourceHandle Handle to the underlying device and host resource needed for referencing data and running algorithms. - + graph : SGGraph or MGGraph The input graph, for either Single or Multi-GPU operations. - + k : size_t (default=None) Order of the core. This value must not be negative. If set to None the main core is returned. - + degree_type: str This option determines if the core number computation should be based on input, output, or both directed edges, with valid values being "incoming", "outgoing", and "bidirectional" respectively. This option is currently ignored in this release, and setting it will result in a warning. - + core_result : device array type Precomputed core number of the nodes of the graph G If set to None, the core numbers of the nodes are calculated @@ -125,12 +125,12 @@ def k_core(ResourceHandle resource_handle, vertices_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj( core_result["vertex"]) - + cdef cugraph_type_erased_device_array_view_t* \ core_numbers_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj( core_result["values"]) - + # Create a core_number result error_code = cugraph_core_result_create(c_resource_handle_ptr, vertices_view_ptr, diff --git a/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx b/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx index 2c22c618249..6e4cd2e282a 100644 --- a/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx +++ b/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -90,7 +90,7 @@ def k_truss_subgraph(ResourceHandle resource_handle, >>> weights = cupy.asarray( ... [0.1, 0.1, 2.1, 2.1, 1.1, 1.1, 7.2, 7.2, 2.1, 2.1, ... 1.1, 1.1, 7.2, 7.2, 3.2, 3.2, 6.1, 6.1] - ... ,dtype=numpy.float32) + ... ,dtype=numpy.float32) >>> k = 2 >>> resource_handle = pylibcugraph.ResourceHandle() >>> graph_props = pylibcugraph.GraphProperties( @@ -141,7 +141,7 @@ def k_truss_subgraph(ResourceHandle resource_handle, # for perfomance improvement cupy_sources = copy_to_cupy_array( c_resource_handle_ptr, sources_ptr) - + cupy_destinations = copy_to_cupy_array( c_resource_handle_ptr, destinations_ptr) @@ -150,7 +150,7 @@ def k_truss_subgraph(ResourceHandle resource_handle, c_resource_handle_ptr, edge_weights_ptr) else: cupy_edge_weights = None - + # FIXME: Should we keep the offsets array or just drop it from the final # solution? cupy_subgraph_offsets = copy_to_cupy_array( diff --git a/python/pylibcugraph/pylibcugraph/katz_centrality.pyx b/python/pylibcugraph/pylibcugraph/katz_centrality.pyx index fc78ca89e87..1716a4fc9bf 100644 --- a/python/pylibcugraph/pylibcugraph/katz_centrality.pyx +++ b/python/pylibcugraph/pylibcugraph/katz_centrality.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -95,7 +95,7 @@ def katz_centrality(ResourceHandle resource_handle, do_expensive_check : bool_t A flag to run expensive checks for input arguments if True. - + Returns ------- @@ -112,9 +112,9 @@ def katz_centrality(ResourceHandle resource_handle, cdef cugraph_error_code_t error_code cdef cugraph_error_t* error_ptr - cdef uintptr_t cai_betas_ptr + cdef uintptr_t cai_betas_ptr cdef cugraph_type_erased_device_array_view_t* betas_ptr - + if betas is not None: cai_betas_ptr = betas.__cuda_array_interface__["data"][0] betas_ptr = \ @@ -143,7 +143,7 @@ def katz_centrality(ResourceHandle resource_handle, cugraph_centrality_result_get_vertices(result_ptr) cdef cugraph_type_erased_device_array_view_t* values_ptr = \ cugraph_centrality_result_get_values(result_ptr) - + cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) cupy_values = copy_to_cupy_array(c_resource_handle_ptr, values_ptr) diff --git a/python/pylibcugraph/pylibcugraph/node2vec.pyx b/python/pylibcugraph/pylibcugraph/node2vec.pyx index 5d83fc46c3c..b2f2948affe 100644 --- a/python/pylibcugraph/pylibcugraph/node2vec.pyx +++ b/python/pylibcugraph/pylibcugraph/node2vec.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -172,7 +172,7 @@ def node2vec(ResourceHandle resource_handle, cupy_weights = copy_to_cupy_array(c_resource_handle_ptr, weights_ptr) cupy_path_sizes = copy_to_cupy_array(c_resource_handle_ptr, path_sizes_ptr) - + cugraph_random_walk_result_free(result_ptr) cugraph_type_erased_device_array_view_free(seed_view_ptr) diff --git a/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx b/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx index 28360121c64..36f414e71ad 100644 --- a/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -65,7 +65,7 @@ def overlap_coefficients(ResourceHandle resource_handle, bool_t do_expensive_check): """ Compute the Overlap coefficients for the specified vertex_pairs. - + Note that Overlap similarity must run on a symmetric graph. @@ -77,13 +77,13 @@ def overlap_coefficients(ResourceHandle resource_handle, graph : SGGraph or MGGraph The input graph, for either Single or Multi-GPU operations. - + first : Source of the vertex pair. - + second : Destination of the vertex pair. - + use_weight : bool, optional If set to True, the compute weighted jaccard_coefficients( the input graph must be weighted in that case). diff --git a/python/pylibcugraph/pylibcugraph/random.pyx b/python/pylibcugraph/pylibcugraph/random.pyx index 4e53d4aaa76..918f9924ea4 100644 --- a/python/pylibcugraph/pylibcugraph/random.pyx +++ b/python/pylibcugraph/pylibcugraph/random.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -72,12 +72,12 @@ cdef class CuGraphRandomState: cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ resource_handle.c_resource_handle_ptr - + cdef cugraph_rng_state_t* new_rng_state_ptr if seed is None: seed = generate_default_seed() - + # reinterpret as unsigned seed &= (2**64 - 1) @@ -85,12 +85,12 @@ cdef class CuGraphRandomState: c_resource_handle_ptr, seed, &new_rng_state_ptr, - &error_ptr + &error_ptr ) assert_success(error_code, error_ptr, "cugraph_rng_state_create") - + self.rng_state_ptr = new_rng_state_ptr - + def __dealloc__(self): """ Destroys this CuGraphRandomState instance. Properly calls diff --git a/python/pylibcugraph/pylibcugraph/replicate_edgelist.pyx b/python/pylibcugraph/pylibcugraph/replicate_edgelist.pyx index 3763d4bc69d..8d4964a314f 100644 --- a/python/pylibcugraph/pylibcugraph/replicate_edgelist.pyx +++ b/python/pylibcugraph/pylibcugraph/replicate_edgelist.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -62,13 +62,13 @@ def replicate_edgelist(ResourceHandle resource_handle, resource_handle : ResourceHandle Handle to the underlying device resources needed for referencing data and running algorithms. - + src_array : device array type, optional Device array containing the vertex identifiers of the source of each directed edge. The order of the array corresponds to the ordering of the dst_array, where the ith item in src_array and the ith item in dst_array define the ith edge of the graph. - + dst_array : device array type, optional Device array containing the vertex identifiers of the destination of each directed edge. The order of the array corresponds to the ordering @@ -80,13 +80,13 @@ def replicate_edgelist(ResourceHandle resource_handle, order of the array corresponds to the ordering of the src_array and dst_array arrays, where the ith item in weight_array is the weight value of the ith edge of the graph. - + edge_id_array : device array type, optional Device array containing the edge id values of each directed edge. The order of the array corresponds to the ordering of the src_array and dst_array arrays, where the ith item in edge_id_array is the id value of the ith edge of the graph. - + edge_type_id_array : device array type, optional Device array containing the edge type id values of each directed edge. The order of the array corresponds to the ordering of the src_array and @@ -112,17 +112,17 @@ def replicate_edgelist(ResourceHandle resource_handle, cdef cugraph_type_erased_device_array_view_t* srcs_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj(src_array) - + cdef cugraph_type_erased_device_array_view_t* dsts_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj(dst_array) - + cdef cugraph_type_erased_device_array_view_t* weights_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj(weight_array) cdef cugraph_type_erased_device_array_view_t* edge_ids_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj(edge_id_array) - + cdef cugraph_type_erased_device_array_view_t* edge_type_ids_view_ptr = \ create_cugraph_type_erased_device_array_view_from_py_obj(edge_type_id_array) @@ -172,11 +172,11 @@ def replicate_edgelist(ResourceHandle resource_handle, if weight_array is not None: cupy_edge_weights = copy_to_cupy_array( c_resource_handle_ptr, edge_weights_ptr) - + if edge_id_array is not None: cupy_edge_ids = copy_to_cupy_array( c_resource_handle_ptr, edge_ids_ptr) - + if edge_type_id_array is not None: cupy_edge_type_ids = copy_to_cupy_array( c_resource_handle_ptr, edge_type_ids_ptr) diff --git a/python/pylibcugraph/pylibcugraph/select_random_vertices.pyx b/python/pylibcugraph/pylibcugraph/select_random_vertices.pyx index 7964f101058..9aa8ff4c01e 100644 --- a/python/pylibcugraph/pylibcugraph/select_random_vertices.pyx +++ b/python/pylibcugraph/pylibcugraph/select_random_vertices.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -76,12 +76,12 @@ def select_random_vertices(ResourceHandle resource_handle, graph : SGGraph or MGGraph The input graph, for either Single or Multi-GPU operations. - + random_state : int , optional Random state to use when generating samples. Optional argument, defaults to a hash of process id, time, and hostname. (See pylibcugraph.random.CuGraphRandomState) - + num_vertices : size_t , optional Number of vertices to sample. Optional argument, defaults to the total number of vertices. @@ -116,7 +116,7 @@ def select_random_vertices(ResourceHandle resource_handle, vertices_view_ptr = \ cugraph_type_erased_device_array_view( vertices_ptr) - - cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_view_ptr) + + cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_view_ptr) return cupy_vertices diff --git a/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx b/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx index 983a635012f..39814b8ad88 100644 --- a/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -65,7 +65,7 @@ def sorensen_coefficients(ResourceHandle resource_handle, bool_t do_expensive_check): """ Compute the Sorensen coefficients for the specified vertex_pairs. - + Note that Sorensen similarity must run on a symmetric graph. Parameters @@ -76,13 +76,13 @@ def sorensen_coefficients(ResourceHandle resource_handle, graph : SGGraph or MGGraph The input graph, for either Single or Multi-GPU operations. - + first : Source of the vertex pair. - + second : Destination of the vertex pair. - + use_weight : bool, optional If set to True, the compute weighted jaccard_coefficients( the input graph must be weighted in that case). diff --git a/python/pylibcugraph/pylibcugraph/spectral_modularity_maximization.pyx b/python/pylibcugraph/pylibcugraph/spectral_modularity_maximization.pyx index fa01714744d..a2c93e025a8 100644 --- a/python/pylibcugraph/pylibcugraph/spectral_modularity_maximization.pyx +++ b/python/pylibcugraph/pylibcugraph/spectral_modularity_maximization.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -90,7 +90,7 @@ def spectral_modularity_maximization(ResourceHandle resource_handle, kmean_max_iter: size_t Specifies the maximum number of iterations for the k-means solver. - + do_expensive_check : bool_t If True, performs more extensive tests on the inputs to ensure validitity, at the expense of increased run time. diff --git a/python/pylibcugraph/pylibcugraph/testing/type_utils.pyx b/python/pylibcugraph/pylibcugraph/testing/type_utils.pyx index 6defb4c6b43..a79893e8e9f 100644 --- a/python/pylibcugraph/pylibcugraph/testing/type_utils.pyx +++ b/python/pylibcugraph/pylibcugraph/testing/type_utils.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -90,7 +90,7 @@ def create_sampling_result(ResourceHandle resource_handle, device_sources.__cuda_array_interface__["data"][0] cdef uintptr_t cai_dsts_ptr = \ device_destinations.__cuda_array_interface__["data"][0] - + cdef uintptr_t cai_weights_ptr if device_weights is not None: cai_weights_ptr = device_weights.__cuda_array_interface__['data'][0] @@ -127,7 +127,7 @@ def create_sampling_result(ResourceHandle resource_handle, len(device_weights), get_c_type_from_numpy_type(device_weights.dtype) ) - ) + ) cdef cugraph_type_erased_device_array_view_t* c_edge_id_ptr = NULL if device_weights is not None: c_edge_id_ptr = ( @@ -136,7 +136,7 @@ def create_sampling_result(ResourceHandle resource_handle, len(device_edge_id), get_c_type_from_numpy_type(device_edge_id.dtype) ) - ) + ) cdef cugraph_type_erased_device_array_view_t* c_edge_type_ptr = NULL if device_weights is not None: c_edge_type_ptr = ( @@ -145,7 +145,7 @@ def create_sampling_result(ResourceHandle resource_handle, len(device_edge_type), get_c_type_from_numpy_type(device_edge_type.dtype) ) - ) + ) cdef cugraph_type_erased_device_array_view_t* c_hop_ptr = NULL if device_weights is not None: @@ -155,7 +155,7 @@ def create_sampling_result(ResourceHandle resource_handle, len(device_hop), get_c_type_from_numpy_type(device_hop.dtype) ) - ) + ) cdef cugraph_type_erased_device_array_view_t* c_label_ptr = NULL if device_weights is not None: @@ -165,7 +165,7 @@ def create_sampling_result(ResourceHandle resource_handle, len(device_batch_label), get_c_type_from_numpy_type(device_batch_label.dtype) ) - ) + ) error_code = cugraph_test_uniform_neighborhood_sample_result_create( diff --git a/python/pylibcugraph/pylibcugraph/triangle_count.pyx b/python/pylibcugraph/pylibcugraph/triangle_count.pyx index fd86181b581..0cbe9be7f19 100644 --- a/python/pylibcugraph/pylibcugraph/triangle_count.pyx +++ b/python/pylibcugraph/pylibcugraph/triangle_count.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -73,11 +73,11 @@ def triangle_count(ResourceHandle resource_handle, start_list: device array type Device array containing the list of vertices for triangle counting. If 'None' the entire set of vertices in the graph is processed - + do_expensive_check: bool If True, performs more extensive tests on the inputs to ensure validitity, at the expense of increased run time. - + Returns ------- A tuple of device arrays, where the first item in the tuple is a device @@ -111,7 +111,7 @@ def triangle_count(ResourceHandle resource_handle, get_c_type_from_numpy_type(start_list.dtype)) else: start_ptr = NULL - + error_code = cugraph_triangle_count(c_resource_handle_ptr, c_graph_ptr, start_ptr, @@ -132,5 +132,5 @@ def triangle_count(ResourceHandle resource_handle, if start_list is not None: cugraph_type_erased_device_array_view_free(start_ptr) - + return (cupy_vertices, cupy_counts) diff --git a/python/pylibcugraph/pylibcugraph/two_hop_neighbors.pyx b/python/pylibcugraph/pylibcugraph/two_hop_neighbors.pyx index 3989e45d48f..4b1a5b74a3f 100644 --- a/python/pylibcugraph/pylibcugraph/two_hop_neighbors.pyx +++ b/python/pylibcugraph/pylibcugraph/two_hop_neighbors.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -66,7 +66,7 @@ def get_two_hop_neighbors(ResourceHandle resource_handle, graph : SGGraph or MGGraph The input graph, for either Single or Multi-GPU operations. - + start_vertices : Optional array of starting vertices If None use all, if specified compute two-hop neighbors for these starting vertices @@ -102,16 +102,16 @@ def get_two_hop_neighbors(ResourceHandle resource_handle, cdef cugraph_type_erased_device_array_view_t* first_ptr = \ cugraph_vertex_pairs_get_first(result_ptr) - + cdef cugraph_type_erased_device_array_view_t* second_ptr = \ cugraph_vertex_pairs_get_second(result_ptr) - + cupy_first = copy_to_cupy_array(c_resource_handle_ptr, first_ptr) cupy_second = copy_to_cupy_array(c_resource_handle_ptr, second_ptr) # Free all pointers cugraph_vertex_pairs_free(result_ptr) if start_vertices is not None: - cugraph_type_erased_device_array_view_free(start_vertices_view_ptr) + cugraph_type_erased_device_array_view_free(start_vertices_view_ptr) return cupy_first, cupy_second diff --git a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx index ce6493c38f5..b4145a80095 100644 --- a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -124,25 +124,25 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, do_expensive_check: bool If True, performs more extensive tests on the inputs to ensure validitity, at the expense of increased run time. - + with_edge_properties: bool If True, returns the edge properties of each edges along with the edges themselves. Will result in an error if the provided graph does not have edge properties. - + batch_id_list: list[int32] (Optional) List of int32 batch ids that is returned with each edge. Optional argument, defaults to NULL, returning nothing. - + label_list: list[int32] (Optional) List of unique int32 batch ids. Required if also passing the label_to_output_comm_rank flag. Default to NULL (does nothing) - + label_to_output_comm_rank: list[int32] (Optional) Maps the unique batch ids in label_list to the rank of the worker that should hold results for that batch id. Defaults to NULL (does nothing) - + prior_sources_behavior: str (Optional) Options are "carryover", and "exclude". Default will leave the source list as-is. @@ -150,7 +150,7 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, current hop. Exclude will exclude sources from previous hops from reappearing as sources in future hops. - + deduplicate_sources: bool (Optional) If True, will deduplicate the source list before sampling. Defaults to False. @@ -159,11 +159,11 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, If True, will renumber the sources and destinations on a per-batch basis and return the renumber map and batch offsets in additional to the standard returns. - + compression: str (Optional) Options: COO (default), CSR, CSC, DCSR, DCSR Sets the compression format for the returned samples. - + compress_per_hop: bool (Optional) If False (default), will create a compressed edgelist for the entire batch. @@ -174,7 +174,7 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, Random state to use when generating samples. Optional argument, defaults to a hash of process id, time, and hostname. (See pylibcugraph.random.CuGraphRandomState) - + return_dict: bool (Optional) Whether to return a dictionary instead of a tuple. Optional argument, defaults to False, returning a tuple. @@ -188,11 +188,11 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, walk respectively, the third item in the tuple is a device array containing the start labels, and the fourth item in the tuple is a device array containing the indices for reconstructing paths. - + If renumber was set to True, then the fifth item in the tuple is a device array containing the renumber map, and the sixth item in the tuple is a device array containing the renumber map offsets (which delineate where - the renumber map for each batch starts). + the renumber map for each batch starts). """ cdef cugraph_resource_handle_t* c_resource_handle_ptr = ( @@ -218,22 +218,22 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, cdef uintptr_t cai_start_ptr = \ start_list.__cuda_array_interface__["data"][0] - + cdef uintptr_t cai_batch_id_ptr if batch_id_list is not None: cai_batch_id_ptr = \ batch_id_list.__cuda_array_interface__['data'][0] - + cdef uintptr_t cai_label_list_ptr if label_list is not None: cai_label_list_ptr = \ label_list.__cuda_array_interface__['data'][0] - + cdef uintptr_t cai_label_to_output_comm_rank_ptr if label_to_output_comm_rank is not None: cai_label_to_output_comm_rank_ptr = \ label_to_output_comm_rank.__cuda_array_interface__['data'][0] - + cdef uintptr_t ai_fan_out_ptr = \ h_fan_out.__array_interface__["data"][0] @@ -251,7 +251,7 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, len(batch_id_list), get_c_type_from_numpy_type(batch_id_list.dtype) ) - + cdef cugraph_type_erased_device_array_view_t* label_list_ptr = NULL if label_list is not None: label_list_ptr = \ @@ -260,7 +260,7 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, len(label_list), get_c_type_from_numpy_type(label_list.dtype) ) - + cdef cugraph_type_erased_device_array_view_t* label_to_output_comm_rank_ptr = NULL if label_to_output_comm_rank is not None: label_to_output_comm_rank_ptr = \ @@ -276,7 +276,7 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, len(h_fan_out), get_c_type_from_numpy_type(h_fan_out.dtype)) - + cg_rng_state = CuGraphRandomState(resource_handle, random_state) cdef cugraph_rng_state_t* rng_state_ptr = \ @@ -294,7 +294,7 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, f'Invalid option {prior_sources_behavior}' ' for prior sources behavior' ) - + cdef cugraph_compression_type_t compression_behavior_e if compression is None or compression == 'COO': compression_behavior_e = cugraph_compression_type_t.COO diff --git a/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx b/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx index 240c374353d..c9f62be389d 100644 --- a/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx +++ b/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -61,8 +61,8 @@ def _ensure_args(graph, offsets, indices, weights): else: invalid_input = [i for p in [offsets, indices] if p is None] input_type = "csr_arrays" - - + + if len(invalid_input) != 0: raise TypeError("Invalid input combination: Must set either 'graph' or " "a combination of 'offsets', 'indices' and 'weights', not both") @@ -71,7 +71,7 @@ def _ensure_args(graph, offsets, indices, weights): assert_CAI_type(offsets, "offsets") assert_CAI_type(indices, "indices") assert_CAI_type(weights, "weights", True) - + return input_type @@ -95,7 +95,7 @@ def weakly_connected_components(ResourceHandle resource_handle, graph : SGGraph or MGGraph The input graph. - + offsets : object supporting a __cuda_array_interface__ interface Array containing the offsets values of a Compressed Sparse Row matrix that represents the graph. @@ -133,7 +133,7 @@ def weakly_connected_components(ResourceHandle resource_handle, ... store_transposed=False, renumber=True, do_expensive_check=False) >>> (vertices, labels) = weakly_connected_components( ... resource_handle, G, None, None, None, None, False) - + >>> vertices [0, 1, 2] >>> labels diff --git a/readme_pages/PRTAGS.md b/readme_pages/PRTAGS.md index 4fa02ff9590..6b16d0740dd 100644 --- a/readme_pages/PRTAGS.md +++ b/readme_pages/PRTAGS.md @@ -1,5 +1,5 @@ # Pull Request Tags -If you look at the list of current [Pull Request](https://github.com/rapidsai/cugraph/pulls) you will notice a set of bracketed tags in the subject line. Those tags help developers focus attention and know what is being asked. +If you look at the list of current [Pull Request](https://github.com/rapidsai/cugraph/pulls) you will notice a set of bracketed tags in the subject line. Those tags help developers focus attention and know what is being asked. PR = Pull Request @@ -11,4 +11,4 @@ PR = Pull Request | REVIEW | The code is ready for a full code review. Only code that has passed a code review is merged into the baseline | -Code must pass CI before it is merged \ No newline at end of file +Code must pass CI before it is merged diff --git a/readme_pages/TRANSITIONGUIDE.md b/readme_pages/TRANSITIONGUIDE.md index 73aa487aca1..f3c9dad207d 100644 --- a/readme_pages/TRANSITIONGUIDE.md +++ b/readme_pages/TRANSITIONGUIDE.md @@ -3,18 +3,18 @@ ## Python API ### Loading an edge list -Renumbering is now enabled by default in `from_cudf_edgelist`. -The renumbering feature allows us to hide the fact that we need vertices to be integers starting at 0. The auto-renumbering feature converts the data into the proper data type required by the underlying implementation. All algorithms accepting vertex identifiers (like the souce vertex for SSSP) now automatically accept user's notation too. On output, it will transparently un-renumber results, basically convert the internal IDs back. +Renumbering is now enabled by default in `from_cudf_edgelist`. +The renumbering feature allows us to hide the fact that we need vertices to be integers starting at 0. The auto-renumbering feature converts the data into the proper data type required by the underlying implementation. All algorithms accepting vertex identifiers (like the souce vertex for SSSP) now automatically accept user's notation too. On output, it will transparently un-renumber results, basically convert the internal IDs back. ## C++ API -Pagerank, BFS, and SSSP have dropped the `gdf_column` dependency in favor of basic types and templates +Pagerank, BFS, and SSSP have dropped the `gdf_column` dependency in favor of basic types and templates -Example : +Example : ``` -// 0.11 API -cugraph::pagerank(cugraph::Graph, gdf_column *pagerank, ...) -// 0.12 API -cugraph::pagerank(cugraph::Graph, float *pagerank ...) +// 0.11 API +cugraph::pagerank(cugraph::Graph, gdf_column *pagerank, ...) +// 0.12 API +cugraph::pagerank(cugraph::Graph, float *pagerank ...) ``` # 0.11 @@ -26,21 +26,21 @@ This release introduces new concepts in the API and improves user experience thr ### New graph types The Python API now has `Graph` (undirected) and `DiGraph` (directed) types. This is closer to NetworkX's API. -In the past, directed graphs were stored using the `Graph` type. +In the past, directed graphs were stored using the `Graph` type. Starting in 0.11, `DiGraph` should be used for directed graphs instead. `Graph` only refers to undirected graphs now. The `Multi(Di)Graph` types were added and more support for this new structure will be added in the next releases (more details in issue #604). ### Undirected graphs -cuGraph currently automatically "symmetrize" undirected inputs: each undirected edge (u,v) is stored as two directed edges (u,v) and (v,u). +cuGraph currently automatically "symmetrize" undirected inputs: each undirected edge (u,v) is stored as two directed edges (u,v) and (v,u). -When viewing the graph or requesting the number of edges, cuGraph will currently return this symmetrized view. -This is an implementation detail that will be hidden to the user in 0.12 (more details in issue #603). +When viewing the graph or requesting the number of edges, cuGraph will currently return this symmetrized view. +This is an implementation detail that will be hidden to the user in 0.12 (more details in issue #603). ### Loading an edge list Users are encouraged to use `from_cudf_edgelist` instead of `add_edge_list`. -This new API supports cuDF DataFrame. Users can now ask for an automatic renumbering of the edge list at the time it is loaded. +This new API supports cuDF DataFrame. Users can now ask for an automatic renumbering of the edge list at the time it is loaded. In this case, all analytics outputs are automatically un-renumbered before being returned. ## C++ API @@ -54,10 +54,10 @@ This release is the first step toward converting the former C-like API into a C+ Example : ```c -// < 0.11 API -gdf_error err = gdf_pagerank(, ...) -// >= 0.11 API -cugraph::pagerank(, ...) +// < 0.11 API +gdf_error err = gdf_pagerank(, ...) +// >= 0.11 API +cugraph::pagerank(, ...) ``` The C++ API provides functions that efficiently convert between data formats and access to the efficient CUDA algorithms. In 0.11, all automatic conversions and decision making were removed from the C++ layer. diff --git a/readme_pages/cugraph_dgl.md b/readme_pages/cugraph_dgl.md index 3c6ddd4026b..7b19787f4c6 100644 --- a/readme_pages/cugraph_dgl.md +++ b/readme_pages/cugraph_dgl.md @@ -3,7 +3,7 @@ [RAPIDS](https://rapids.ai) cugraph_dgl enables the ability to use cugraph Property Graphs with DGL. This cugraph backend allows DGL users access to a collection of GPU-accelerated algorithms for graph analytics, such as sampling, centrality computation, and community detection. -The goal of `cugraph_dgl` is to enable Multi-Node Multi-GPU cugraph accelerated graphs to help train large-scale Graph Neural Networks(GNN) on DGL by providing a duck-typed version of the [DGLGraph](https://docs.dgl.ai/api/python/dgl.DGLGraph.html#dgl.DGLGraph) which uses cugraph for storing graph structure and node/edge feature data. +The goal of `cugraph_dgl` is to enable Multi-Node Multi-GPU cugraph accelerated graphs to help train large-scale Graph Neural Networks(GNN) on DGL by providing a duck-typed version of the [DGLGraph](https://docs.dgl.ai/api/python/dgl.DGLGraph.html#dgl.DGLGraph) which uses cugraph for storing graph structure and node/edge feature data. ## Usage ```diff @@ -15,14 +15,13 @@ sampler = dgl.dataloading.NeighborSampler( [15, 10, 5], prefetch_node_feats=['feat'], prefetch_labels=['label']) train_dataloader = dgl.dataloading.DataLoader( -- dgl_g, +- dgl_g, + cugraph_g, -train_idx, -sampler, -device=device, +train_idx, +sampler, +device=device, batch_size=1024, shuffle=True, -drop_last=False, +drop_last=False, num_workers=0) ``` - diff --git a/readme_pages/cugraph_ops.md b/readme_pages/cugraph_ops.md index 87b0051a815..7bd4ac55185 100644 --- a/readme_pages/cugraph_ops.md +++ b/readme_pages/cugraph_ops.md @@ -7,7 +7,7 @@ CuGraphOps Cugraph-ops is a closed-source library that is composed of highly optimized and performant primitives associated with GNNs and related graph -operations, such as training, sampling and inference. +operations, such as training, sampling and inference. This is how cuGraphOps fits into the cuGraph ecosystem diff --git a/readme_pages/cugraph_python.md b/readme_pages/cugraph_python.md index 164c1212ed8..69d70f69fad 100644 --- a/readme_pages/cugraph_python.md +++ b/readme_pages/cugraph_python.md @@ -1,13 +1,13 @@ # cuGraph – Python -cuGraph is a Python package that encapsulate and hides the complexity of the lower layer C/CUDA code. Additionally, the software is focused on providing an easy and familiar API +cuGraph is a Python package that encapsulate and hides the complexity of the lower layer C/CUDA code. Additionally, the software is focused on providing an easy and familiar API ## cuGraph Notice -Vertex IDs are expected to be contiguous integers starting from 0. If your data doesn't match that restriction, we have a solution. cuGraph provides the renumber function, which is by default automatically called when data is added to a graph. Input vertex IDs for the renumber function can be any type, can be non-contiguous, can be multiple columns, and can start from an arbitrary number. The renumber function maps the provided input vertex IDs to either 32- or 64-bit contiguous integers starting from 0. +Vertex IDs are expected to be contiguous integers starting from 0. If your data doesn't match that restriction, we have a solution. cuGraph provides the renumber function, which is by default automatically called when data is added to a graph. Input vertex IDs for the renumber function can be any type, can be non-contiguous, can be multiple columns, and can start from an arbitrary number. The renumber function maps the provided input vertex IDs to either 32- or 64-bit contiguous integers starting from 0. Additionally, when using the auto-renumbering feature, vertices are automatically un-renumbered in results. @@ -21,4 +21,4 @@ The amount of memory required is dependent on the graph structure and the analyt | 500 million edges | 32 GB | | 250 million edges | 16 GB | -The use of managed memory for oversubscription can also be used to exceed the above memory limitations. See the recent blog on _Tackling Large Graphs with RAPIDS cuGraph and CUDA Unified Memory on GPUs_: https://medium.com/rapids-ai/tackling-large-graphs-with-rapids-cugraph-and-unified-virtual-memory-b5b69a065d4 \ No newline at end of file +The use of managed memory for oversubscription can also be used to exceed the above memory limitations. See the recent blog on _Tackling Large Graphs with RAPIDS cuGraph and CUDA Unified Memory on GPUs_: https://medium.com/rapids-ai/tackling-large-graphs-with-rapids-cugraph-and-unified-virtual-memory-b5b69a065d4 diff --git a/readme_pages/cugraph_service.md b/readme_pages/cugraph_service.md index 9c06cd9f71a..b1a95e62a95 100644 --- a/readme_pages/cugraph_service.md +++ b/readme_pages/cugraph_service.md @@ -4,7 +4,7 @@ The goal of cugraph_service is to wrap a cuGraph cluster and provide a Graph-as- Goals * Separate large graph management and analytic code from application code - * The application, like GNN code, should be isolated from the details of cuGraph graph management, dedicated multi-node/multi-GPU setup, feature storage and retrieval, etc. + * The application, like GNN code, should be isolated from the details of cuGraph graph management, dedicated multi-node/multi-GPU setup, feature storage and retrieval, etc. * Scaling from single GPU (SG), to multi-GPU (MG), to multi-node/multi-GPU (MNMG) should not require changes to the graph integration code * Support multiple concurrent clients/processes/threads sharing one or more graphs @@ -20,7 +20,7 @@ Goals # Picture -One option on a single DGX +One option on a single DGX graph_service_cluster diff --git a/readme_pages/data_types.md b/readme_pages/data_types.md index 37f2ee4daf6..936775742f1 100644 --- a/readme_pages/data_types.md +++ b/readme_pages/data_types.md @@ -1,5 +1,5 @@ # External Data Types -cuGraph Python strives to make getting data into and out of cuGraph simple. To that end, the Python interface accepts +cuGraph Python strives to make getting data into and out of cuGraph simple. To that end, the Python interface accepts @@ -15,7 +15,7 @@ cuGraph supports graph creation with Source and Destination being expressed as: cuGraph tries to match the return type based on the input type. So a NetworkX input will return the same data type that NetworkX would have. ## cuDF -The preferred data type is a cuDF object since it is already in the GPU. For loading data from disk into cuDF please see the cuDF documentation. +The preferred data type is a cuDF object since it is already in the GPU. For loading data from disk into cuDF please see the cuDF documentation. __Loading data__ * Graph.from_cudf_adjlist @@ -23,18 +23,18 @@ __Loading data__ __Results__
-Results which are not simple types (ints, floats) are typically cuDF Dataframes. +Results which are not simple types (ints, floats) are typically cuDF Dataframes. ## Pandas -The RAPIDS cuDF library can be thought of as accelerated Pandas +The RAPIDS cuDF library can be thought of as accelerated Pandas ## NetworkX Graph Objects -## +## diff --git a/readme_pages/gnn_support.md b/readme_pages/gnn_support.md index 1c52be1c013..924c2bf62af 100644 --- a/readme_pages/gnn_support.md +++ b/readme_pages/gnn_support.md @@ -27,7 +27,6 @@ An overview of GNN's and how they are used is found in this excellent [blog](htt RAPIDS GNN components improve other industy GNN specific projects. Due to the degree distribution of nodes, memory bottlenecks are the pain point for large scale graphs. To solve this problem, sampling operations form the backbone for Graph Neural Networks (GNN) training. However, current sampling methods provided by other libraries are not optimized enough for the whole process of GNN training. The main limit to performance is moving data between the hosts and devices. In cuGraph, we provide an end-to-end solution from data loading to training all on the GPUs. -CuGraph now supports compatibility with [Deep Graph Library](https://www.dgl.ai/) (DGL) and [PyTorch Geometric](https://pytorch-geometric.readthedocs.io/en/latest/) (PyG) by allowing conversion between a cuGraph object and a DGL or PyG object, making it possible for DGL and PyG users to access efficient data loader and graph operations (such as uniformed sampling) implementations in cuGraph, as well as keep their models unchanged in DGL or PyG. We have considerable speedup compared with the original implementation in DGL and PyG. +CuGraph now supports compatibility with [Deep Graph Library](https://www.dgl.ai/) (DGL) and [PyTorch Geometric](https://pytorch-geometric.readthedocs.io/en/latest/) (PyG) by allowing conversion between a cuGraph object and a DGL or PyG object, making it possible for DGL and PyG users to access efficient data loader and graph operations (such as uniformed sampling) implementations in cuGraph, as well as keep their models unchanged in DGL or PyG. We have considerable speedup compared with the original implementation in DGL and PyG. [](https://developer.nvidia.com/blog/optimizing-fraud-detection-in-financial-services-with-graph-neural-networks-and-nvidia-gpus/) - diff --git a/readme_pages/news.md b/readme_pages/news.md index f9ebe36b9dd..fa22fb8c318 100644 --- a/readme_pages/news.md +++ b/readme_pages/news.md @@ -1,13 +1,13 @@ # cuGraph News ### Scaling to 1 Trillion Edges -At GTC Spring '22 we presented results of running cuGraph on the [Selene](https://top500.org/system/179842/) supercomputer using 2,048 GPUs and processing a graph with `1.1 Trillion edges`. Synthetic data created with the RMAT generator found in cuGraph. +At GTC Spring '22 we presented results of running cuGraph on the [Selene](https://top500.org/system/179842/) supercomputer using 2,048 GPUs and processing a graph with `1.1 Trillion edges`. Synthetic data created with the RMAT generator found in cuGraph.
 
cuGraph Scaling


### cuGraph Software Stack -cuGraph has a new multi-layer software stack that allows users and system integrators to access cuGraph at different layers. +cuGraph has a new multi-layer software stack that allows users and system integrators to access cuGraph at different layers.
 
cuGraph Software Stack
-

\ No newline at end of file +

diff --git a/readme_pages/performance/performance.md b/readme_pages/performance/performance.md index 5700c59bba3..159b2334704 100644 --- a/readme_pages/performance/performance.md +++ b/readme_pages/performance/performance.md @@ -4,4 +4,4 @@ alt="RAPIDS" width="500"> We are working on a new nightly benchmarking system that will produce performance numbers. -This is a splash page for where the performance numbers will be posted in early 2023. \ No newline at end of file +This is a splash page for where the performance numbers will be posted in early 2023. diff --git a/readme_pages/property_graph.md b/readme_pages/property_graph.md index 19d6e23f718..bce090c2ad3 100644 --- a/readme_pages/property_graph.md +++ b/readme_pages/property_graph.md @@ -20,8 +20,8 @@ Property Graph enables: This is an example of using the cuGraph Property Graph in a two stage analysis. ``` -import cudf -import cugraph +import cudf +import cugraph from cugraph.experimental import PropertyGraph # Import a built-in dataset @@ -33,22 +33,22 @@ graph = cugraph.Graph(directed=False) G = karate.get_graph(create_using=graph,fetch=True) df = G.edgelist.edgelist_df -pG = PropertyGraph() +pG = PropertyGraph() pG. add_edge_data(df, vertex_col_names=("src", "dst")) -# Run Louvain to get the partition number for each vertex. -# Set resolution accordingly to identify two primary partitions. +# Run Louvain to get the partition number for each vertex. +# Set resolution accordingly to identify two primary partitions. (partition_info, _) = cugraph.louvain(pG.extract_subgraph(create_using=graph), resolution=0.6) -# Add the partition numbers back to the Property Graph as vertex properties +# Add the partition numbers back to the Property Graph as vertex properties pG.add_vertex_data(partition_info, vertex_col_name="vertex") -# Use the partition properties to extract a Graph for each partition. +# Use the partition properties to extract a Graph for each partition. G0 = pG.extract_subgraph(selection=pG.select_vertices("partition == 0")) G1 = pG.extract_subgraph(selection=pG. select_vertices("partition == 1")) -# Run pagerank on each graph, print results. -pageranks0 = cugraph.pagerank(G0) -pageranks1 = cugraph.pagerank(G1) +# Run pagerank on each graph, print results. +pageranks0 = cugraph.pagerank(G0) +pageranks1 = cugraph.pagerank(G1) print(pageranks0.sort_values (by="pagerank", ascending=False).head(3)) print(pageranks1.sort_values (by="pagerank", ascending=False).head(3)) ```