diff --git a/build.sh b/build.sh index 74bdb3c6a2f..8dca89aeedd 100755 --- a/build.sh +++ b/build.sh @@ -30,7 +30,7 @@ VALIDARGS=" cpp-mgtests cugraph-pyg cugraph-dgl - cugraph-nx + nx-cugraph docs -v -g @@ -54,7 +54,7 @@ HELP="$0 [ ...] [ ...] pylibcugraph - build the pylibcugraph Python package cugraph-pyg - build the cugraph-pyg Python package cugraph - build the cugraph Python package - cugraph-nx - build the cugraph-nx Python package + nx-cugraph - build the nx-cugraph Python package cugraph-service - build the cugraph-service_client and cugraph-service_server Python package cpp-mgtests - build libcugraph and libcugraph_etl MG tests. Builds MPI communicator, adding MPI as a dependency. cugraph-dgl - build the cugraph-dgl extensions for DGL @@ -209,7 +209,7 @@ if hasArg uninstall; then # removes the latest one and leaves the others installed. build.sh uninstall # can be run multiple times to remove all of them, but that is not obvious. pip uninstall -y pylibcugraph cugraph cugraph-service-client cugraph-service-server \ - cugraph-dgl cugraph-pyg cugraph-nx + cugraph-dgl cugraph-pyg nx-cugraph fi if hasArg clean; then @@ -382,12 +382,12 @@ if hasArg cugraph-dgl; then fi fi -# Build and install the cugraph-nx Python package -if hasArg cugraph-nx; then +# Build and install the nx-cugraph Python package +if hasArg nx-cugraph; then if hasArg --clean; then - cleanPythonDir ${REPODIR}/python/cugraph-nx + cleanPythonDir ${REPODIR}/python/nx-cugraph else - python ${PYTHON_ARGS_FOR_INSTALL} ${REPODIR}/python/cugraph-nx + python ${PYTHON_ARGS_FOR_INSTALL} ${REPODIR}/python/nx-cugraph fi fi diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index 3fd57f24c40..3fb72cac08b 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -11,6 +11,6 @@ rapids-print-env rapids-logger "Begin cpp build" -rapids-mamba-retry mambabuild conda/recipes/libcugraph +rapids-conda-retry mambabuild conda/recipes/libcugraph rapids-upload-conda-to-s3 cpp diff --git a/ci/build_python.sh b/ci/build_python.sh index 595eedf9e46..62eb6c2ccec 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -15,32 +15,32 @@ rapids-logger "Begin py build" # TODO: Remove `--no-test` flags once importing on a CPU # node works correctly -rapids-mamba-retry mambabuild \ +rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ conda/recipes/pylibcugraph -rapids-mamba-retry mambabuild \ +rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/cugraph -# NOTE: nothing in cugraph-nx is CUDA-specific, but it is built on each CUDA +# NOTE: nothing in nx-cugraph is CUDA-specific, but it is built on each CUDA # platform to ensure it is included in each set of artifacts, since test # scripts only install from one set of artifacts based on the CUDA version used # for the test run. -rapids-mamba-retry mambabuild \ +rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ - conda/recipes/cugraph-nx + conda/recipes/nx-cugraph # NOTE: nothing in the cugraph-service packages are CUDA-specific, but they are # built on each CUDA platform to ensure they are included in each set of # artifacts, since test scripts only install from one set of artifacts based on # the CUDA version used for the test run. -rapids-mamba-retry mambabuild \ +rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ @@ -50,7 +50,7 @@ RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then # Only CUDA 11 is supported right now due to PyTorch requirement. - rapids-mamba-retry mambabuild \ + rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ @@ -60,7 +60,7 @@ if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then conda/recipes/cugraph-pyg # Only CUDA 11 is supported right now due to PyTorch requirement. - rapids-mamba-retry mambabuild \ + rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index f9a78b275ae..2c8735079f0 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -61,7 +61,7 @@ sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugr sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugraph-service/client/cugraph_service_client/__init__.py sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugraph-service/server/cugraph_service_server/__init__.py sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/pylibcugraph/pylibcugraph/__init__.py -sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugraph-nx/cugraph_nx/__init__.py +sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/nx-cugraph/nx_cugraph/__init__.py # Python pyproject.toml updates sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph/pyproject.toml @@ -70,7 +70,7 @@ sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph-pyg sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph-service/client/pyproject.toml sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph-service/server/pyproject.toml sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/pylibcugraph/pyproject.toml -sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph-nx/pyproject.toml +sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/nx-cugraph/pyproject.toml # Wheel testing script sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_cugraph.sh diff --git a/ci/test_python.sh b/ci/test_python.sh index e650630fa47..14886909fc9 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -34,7 +34,7 @@ rapids-mamba-retry install \ libcugraph \ pylibcugraph \ cugraph \ - cugraph-nx \ + nx-cugraph \ cugraph-service-server \ cugraph-service-client @@ -93,28 +93,28 @@ pytest \ cugraph/pytest-based/bench_algos.py popd -rapids-logger "pytest cugraph-nx" -pushd python/cugraph-nx/cugraph_nx +rapids-logger "pytest nx-cugraph" +pushd python/nx-cugraph/nx_cugraph pytest \ --capture=no \ --verbose \ --cache-clear \ - --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-nx.xml" \ + --junitxml="${RAPIDS_TESTS_DIR}/junit-nx-cugraph.xml" \ --cov-config=../../.coveragerc \ - --cov=cugraph_nx \ - --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-nx-coverage.xml" \ + --cov=nx_cugraph \ + --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/nx-cugraph-coverage.xml" \ --cov-report=term \ --benchmark-disable \ tests popd -rapids-logger "pytest networkx using cugraph-nx backend" -pushd python/cugraph-nx +rapids-logger "pytest networkx using nx-cugraph backend" +pushd python/nx-cugraph ./run_nx_tests.sh # run_nx_tests.sh outputs coverage data, so check that total coverage is >0.0% -# in case cugraph-nx failed to load but fallback mode allowed the run to pass. +# in case nx-cugraph failed to load but fallback mode allowed the run to pass. _coverage=$(coverage report|grep "^TOTAL") -echo "cugraph-nx coverage from networkx tests: $_coverage" +echo "nx-cugraph coverage from networkx tests: $_coverage" echo $_coverage | awk '{ if ($NF == "0.0%") exit 1 }' popd diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 075cf231725..c66890f8ae5 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -5,6 +5,7 @@ channels: - rapidsai-nightly - dask/label/dev - pytorch +- pyg - dglteam/label/cu118 - conda-forge - nvidia diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index eacafbfd6c4..3afb1415572 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -5,6 +5,7 @@ channels: - rapidsai-nightly - dask/label/dev - pytorch +- pyg - dglteam/label/cu118 - conda-forge - nvidia diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml index 4d3d7c44093..2d7ed2f4cda 100644 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ b/conda/recipes/cugraph-pyg/meta.yaml @@ -33,6 +33,7 @@ requirements: - pytorch >=2.0 - cupy >=12.0.0 - cugraph ={{ version }} + - pylibcugraphops ={{ version }} - pyg >=2.3,<2.4 tests: diff --git a/conda/recipes/cugraph-nx/build.sh b/conda/recipes/nx-cugraph/build.sh similarity index 86% rename from conda/recipes/cugraph-nx/build.sh rename to conda/recipes/nx-cugraph/build.sh index 31ad477a73e..26665c1e76a 100644 --- a/conda/recipes/cugraph-nx/build.sh +++ b/conda/recipes/nx-cugraph/build.sh @@ -4,4 +4,4 @@ # This assumes the script is executed from the root of the repo directory -./build.sh cugraph-nx +./build.sh nx-cugraph diff --git a/conda/recipes/cugraph-nx/meta.yaml b/conda/recipes/nx-cugraph/meta.yaml similarity index 96% rename from conda/recipes/cugraph-nx/meta.yaml rename to conda/recipes/nx-cugraph/meta.yaml index d6b12974981..556d72e8548 100644 --- a/conda/recipes/cugraph-nx/meta.yaml +++ b/conda/recipes/nx-cugraph/meta.yaml @@ -6,7 +6,7 @@ {% set date_string = environ['RAPIDS_DATE_STRING'] %} package: - name: cugraph-nx + name: nx-cugraph version: {{ version }} source: @@ -29,7 +29,7 @@ requirements: tests: imports: - - cugraph_nx + - nx_cugraph commands: - pip check requires: diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 29a488e7505..b624ec5c0e0 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -589,6 +589,8 @@ weight_t hungarian(raft::handle_t const& handle, * @param[in] graph input graph object * @param[out] clustering Pointer to device array where the clustering should be stored * @param[in] max_level (optional) maximum number of levels to run (default 100) + * @param[in] threshold (optional) threshold for convergence at each level (default + * 1e-7) * @param[in] resolution (optional) The value of the resolution parameter to use. * Called gamma in the modularity formula, this changes the size * of the communities. Higher resolutions lead to more smaller @@ -607,6 +609,7 @@ std::pair louvain( std::optional> edge_weight_view, vertex_t* clustering, size_t max_level = 100, + weight_t threshold = weight_t{1e-7}, weight_t resolution = weight_t{1}); template @@ -652,6 +655,7 @@ std::pair>, weight_t> louvain( graph_view_t const& graph_view, std::optional> edge_weight_view, size_t max_level = 100, + weight_t threshold = weight_t{1e-7}, weight_t resolution = weight_t{1}); /** diff --git a/cpp/include/cugraph_c/community_algorithms.h b/cpp/include/cugraph_c/community_algorithms.h index fd0e1de9cb4..e938c77cccd 100644 --- a/cpp/include/cugraph_c/community_algorithms.h +++ b/cpp/include/cugraph_c/community_algorithms.h @@ -93,6 +93,7 @@ typedef struct { * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage * needs to be transposed * @param [in] max_level Maximum level in hierarchy + * @param [in] threshold Threshold parameter, defines convergence at each level of hierarchy * @param [in] resolution Resolution parameter (gamma) in modularity formula. * This changes the size of the communities. Higher resolutions * lead to more smaller communities, lower resolutions lead to @@ -107,6 +108,7 @@ typedef struct { cugraph_error_code_t cugraph_louvain(const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, size_t max_level, + double threshold, double resolution, bool_t do_expensive_check, cugraph_hierarchical_clustering_result_t** result, diff --git a/cpp/src/c_api/louvain.cpp b/cpp/src/c_api/louvain.cpp index ff75cafa031..0e48b29388a 100644 --- a/cpp/src/c_api/louvain.cpp +++ b/cpp/src/c_api/louvain.cpp @@ -36,6 +36,7 @@ struct louvain_functor : public cugraph::c_api::abstract_functor { raft::handle_t const& handle_; cugraph::c_api::cugraph_graph_t* graph_; size_t max_level_; + double threshold_; double resolution_; bool do_expensive_check_; cugraph::c_api::cugraph_hierarchical_clustering_result_t* result_{}; @@ -43,12 +44,14 @@ struct louvain_functor : public cugraph::c_api::abstract_functor { louvain_functor(::cugraph_resource_handle_t const* handle, ::cugraph_graph_t* graph, size_t max_level, + double threshold, double resolution, bool do_expensive_check) : abstract_functor(), handle_(*reinterpret_cast(handle)->handle_), graph_(reinterpret_cast(graph)), max_level_(max_level), + threshold_(threshold), resolution_(resolution), do_expensive_check_(do_expensive_check) { @@ -102,6 +105,7 @@ struct louvain_functor : public cugraph::c_api::abstract_functor { .view()), clusters.data(), max_level_, + static_cast(threshold_), static_cast(resolution_)); rmm::device_uvector vertices(graph_view.local_vertex_partition_range_size(), @@ -121,12 +125,13 @@ struct louvain_functor : public cugraph::c_api::abstract_functor { extern "C" cugraph_error_code_t cugraph_louvain(const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, size_t max_level, + double threshold, double resolution, bool_t do_expensive_check, cugraph_hierarchical_clustering_result_t** result, cugraph_error_t** error) { - louvain_functor functor(handle, graph, max_level, resolution, do_expensive_check); + louvain_functor functor(handle, graph, max_level, threshold, resolution, do_expensive_check); return cugraph::c_api::run_algorithm(graph, functor, result, error); } diff --git a/cpp/src/community/louvain_impl.cuh b/cpp/src/community/louvain_impl.cuh index 167de36dd13..7777921a091 100644 --- a/cpp/src/community/louvain_impl.cuh +++ b/cpp/src/community/louvain_impl.cuh @@ -47,6 +47,7 @@ std::pair>, weight_t> louvain( graph_view_t const& graph_view, std::optional> edge_weight_view, size_t max_level, + weight_t threshold, weight_t resolution) { using graph_t = cugraph::graph_t; @@ -169,7 +170,7 @@ std::pair>, weight_t> louvain( // during each iteration of the loop bool up_down = true; - while (new_Q > (cur_Q + 0.0001)) { + while (new_Q > (cur_Q + threshold)) { cur_Q = new_Q; next_clusters_v = detail::update_clustering_by_delta_modularity(handle, @@ -291,12 +292,13 @@ std::pair>, weight_t> louvain( graph_view_t const& graph_view, std::optional> edge_weight_view, size_t max_level, + weight_t threshold, weight_t resolution) { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); CUGRAPH_EXPECTS(edge_weight_view.has_value(), "Graph must be weighted"); - return detail::louvain(handle, graph_view, edge_weight_view, max_level, resolution); + return detail::louvain(handle, graph_view, edge_weight_view, max_level, threshold, resolution); } template @@ -317,6 +319,7 @@ std::pair louvain( std::optional> edge_weight_view, vertex_t* clustering, size_t max_level, + weight_t threshold, weight_t resolution) { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); @@ -328,7 +331,7 @@ std::pair louvain( weight_t modularity; std::tie(dendrogram, modularity) = - detail::louvain(handle, graph_view, edge_weight_view, max_level, resolution); + detail::louvain(handle, graph_view, edge_weight_view, max_level, threshold, resolution); detail::flatten_dendrogram(handle, graph_view, *dendrogram, clustering); diff --git a/cpp/src/community/louvain_mg.cu b/cpp/src/community/louvain_mg.cu index d6d266df273..0be32ed049f 100644 --- a/cpp/src/community/louvain_mg.cu +++ b/cpp/src/community/louvain_mg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,36 +25,42 @@ template std::pair>, float> louvain( graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, float> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, float> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair louvain( @@ -63,6 +69,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -70,6 +77,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + double, double); template std::pair louvain( raft::handle_t const&, @@ -77,6 +85,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -84,6 +93,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + double, double); template std::pair louvain( raft::handle_t const&, @@ -91,6 +101,7 @@ template std::pair louvain( std::optional>, int64_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -98,6 +109,7 @@ template std::pair louvain( std::optional>, int64_t*, size_t, + double, double); } // namespace cugraph diff --git a/cpp/src/community/louvain_sg.cu b/cpp/src/community/louvain_sg.cu index 4e26aa1cf18..3fc0ffab928 100644 --- a/cpp/src/community/louvain_sg.cu +++ b/cpp/src/community/louvain_sg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,36 +25,42 @@ template std::pair>, float> louvain( graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, float> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, float> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair louvain( @@ -63,6 +69,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -70,6 +77,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + double, double); template std::pair louvain( raft::handle_t const&, @@ -77,6 +85,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -84,6 +93,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + double, double); template std::pair louvain( raft::handle_t const&, @@ -91,6 +101,7 @@ template std::pair louvain( std::optional>, int64_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -98,6 +109,7 @@ template std::pair louvain( std::optional>, int64_t*, size_t, + double, double); } // namespace cugraph diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index f4c4745b14c..2f30faebb3e 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -1023,7 +1023,7 @@ nbr_intersection(raft::handle_t const& handle, (*major_nbr_offsets).begin() + 1); } - std::tie(*major_nbr_indices, std::ignore) = shuffle_values( + std::tie(major_nbr_indices, std::ignore) = shuffle_values( major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); if constexpr (!std::is_same_v) { diff --git a/cpp/tests/c_api/louvain_test.c b/cpp/tests/c_api/louvain_test.c index f3813b5a1ac..e9ac5c9ff06 100644 --- a/cpp/tests/c_api/louvain_test.c +++ b/cpp/tests/c_api/louvain_test.c @@ -33,6 +33,7 @@ int generic_louvain_test(vertex_t* h_src, size_t num_vertices, size_t num_edges, size_t max_level, + double threshold, double resolution, bool_t store_transposed) { @@ -60,7 +61,7 @@ int generic_louvain_test(vertex_t* h_src, TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); ret_code = - cugraph_louvain(p_handle, p_graph, max_level, resolution, FALSE, &p_result, &ret_error); + cugraph_louvain(p_handle, p_graph, max_level, threshold, resolution, FALSE, &p_result, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_louvain failed."); @@ -108,6 +109,7 @@ int test_louvain() size_t num_edges = 16; size_t num_vertices = 6; size_t max_level = 10; + weight_t threshold = 1e-7; weight_t resolution = 1.0; vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; @@ -126,6 +128,7 @@ int test_louvain() num_vertices, num_edges, max_level, + threshold, resolution, FALSE); } @@ -135,6 +138,7 @@ int test_louvain_no_weight() size_t num_edges = 16; size_t num_vertices = 6; size_t max_level = 10; + weight_t threshold = 1e-7; weight_t resolution = 1.0; vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; @@ -151,6 +155,7 @@ int test_louvain_no_weight() num_vertices, num_edges, max_level, + threshold, resolution, FALSE); } diff --git a/cpp/tests/c_api/mg_louvain_test.c b/cpp/tests/c_api/mg_louvain_test.c index d4c10d49891..2465709c03c 100644 --- a/cpp/tests/c_api/mg_louvain_test.c +++ b/cpp/tests/c_api/mg_louvain_test.c @@ -33,6 +33,7 @@ int generic_louvain_test(const cugraph_resource_handle_t* p_handle, size_t num_vertices, size_t num_edges, size_t max_level, + double threshold, double resolution, bool_t store_transposed) { @@ -51,7 +52,7 @@ int generic_louvain_test(const cugraph_resource_handle_t* p_handle, TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); ret_code = - cugraph_louvain(p_handle, p_graph, max_level, resolution, FALSE, &p_result, &ret_error); + cugraph_louvain(p_handle, p_graph, max_level, threshold, resolution, FALSE, &p_result, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_louvain failed."); @@ -106,6 +107,7 @@ int test_louvain(const cugraph_resource_handle_t* handle) size_t num_edges = 8; size_t num_vertices = 6; size_t max_level = 10; + weight_t threshold = 1e-7; weight_t resolution = 1.0; vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; @@ -116,7 +118,7 @@ int test_louvain(const cugraph_resource_handle_t* handle) // Louvain wants store_transposed = FALSE return generic_louvain_test( - handle, h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, max_level, resolution, FALSE); + handle, h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, max_level, threshold, resolution, FALSE); } /******************************************************************************/ diff --git a/cpp/tests/community/louvain_test.cpp b/cpp/tests/community/louvain_test.cpp index 4792042365b..1e1fb6d4c33 100644 --- a/cpp/tests/community/louvain_test.cpp +++ b/cpp/tests/community/louvain_test.cpp @@ -30,8 +30,9 @@ #include struct Louvain_Usecase { - size_t max_level_{100}; - double resolution_{1}; + std::optional max_level_{std::nullopt}; + std::optional threshold_{std::nullopt}; + std::optional resolution_{std::nullopt}; bool check_correctness_{false}; int expected_level_{0}; float expected_modularity_{0}; @@ -54,6 +55,12 @@ class Tests_Louvain { auto [louvain_usecase, input_usecase] = param; + // Legacy implementation does not support resolution parameter, + // defaulting it to 1. If the test case is not resolution + // 1 then skip it. + if (louvain_usecase.resolution_) + if (louvain_usecase.resolution_ != double{1}) return; + raft::handle_t handle{}; bool directed{false}; @@ -134,6 +141,9 @@ class Tests_Louvain EXPECT_THROW(louvain(graph_view, edge_weight_view, graph_view.local_vertex_partition_range_size(), + louvain_usecase.max_level_, + louvain_usecase.threshold_, + louvain_usecase.resolution_, louvain_usecase.check_correctness_, louvain_usecase.expected_level_, louvain_usecase.expected_modularity_), @@ -142,6 +152,9 @@ class Tests_Louvain louvain(graph_view, edge_weight_view, graph_view.local_vertex_partition_range_size(), + louvain_usecase.max_level_, + louvain_usecase.threshold_, + louvain_usecase.resolution_, louvain_usecase.check_correctness_, louvain_usecase.expected_level_, louvain_usecase.expected_modularity_); @@ -185,6 +198,9 @@ class Tests_Louvain cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, vertex_t num_vertices, + std::optional max_level, + std::optional threshold, + std::optional resolution, bool check_correctness, int expected_level, float expected_modularity) @@ -195,8 +211,29 @@ class Tests_Louvain size_t level; weight_t modularity; - std::tie(level, modularity) = cugraph::louvain( - handle, graph_view, edge_weight_view, clustering_v.data(), size_t{100}, weight_t{1}); + if (resolution) { + std::tie(level, modularity) = + cugraph::louvain(handle, + graph_view, + edge_weight_view, + clustering_v.data(), + max_level ? *max_level : size_t{100}, + threshold ? static_cast(*threshold) : weight_t{1e-7}, + static_cast(*resolution)); + } else if (threshold) { + std::tie(level, modularity) = cugraph::louvain(handle, + graph_view, + edge_weight_view, + clustering_v.data(), + max_level ? *max_level : size_t{100}, + static_cast(*threshold)); + } else if (max_level) { + std::tie(level, modularity) = + cugraph::louvain(handle, graph_view, edge_weight_view, clustering_v.data(), *max_level); + } else { + std::tie(level, modularity) = + cugraph::louvain(handle, graph_view, edge_weight_view, clustering_v.data()); + } RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -421,8 +458,11 @@ TEST_P(Tests_Louvain_Rmat64, CheckInt64Int64FloatFloat) INSTANTIATE_TEST_SUITE_P( simple_test, Tests_Louvain_File, - ::testing::Combine(::testing::Values(Louvain_Usecase{100, 1, true, 3, 0.408695}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + ::testing::Combine( + ::testing::Values(Louvain_Usecase{std::nullopt, std::nullopt, std::nullopt, true, 3, 0.408695}, + Louvain_Usecase{20, double{1e-4}, std::nullopt, true, 3, 0.408695}, + Louvain_Usecase{100, double{1e-4}, double{0.8}, true, 3, 0.48336622}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index f89301c485b..41339e32d77 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -41,6 +41,7 @@ // struct Louvain_Usecase { size_t max_level_{100}; + double threshold_{1e-7}; double resolution_{1}; bool check_correctness_{true}; }; @@ -72,6 +73,7 @@ class Tests_MGLouvain cugraph::graph_view_t const& mg_graph_view, std::optional> mg_edge_weight_view, cugraph::Dendrogram const& mg_dendrogram, + weight_t threshold, weight_t resolution, weight_t mg_modularity) { @@ -100,6 +102,7 @@ class Tests_MGLouvain &sg_edge_weights, &sg_modularity, &handle, + threshold, resolution, comm_rank](size_t i) { rmm::device_uvector d_mg_aggregate_cluster_v(0, handle.get_stream()); @@ -128,6 +131,7 @@ class Tests_MGLouvain sg_edge_weight_view, d_sg_cluster_v.data(), size_t{1}, + threshold, resolution); EXPECT_TRUE(cugraph::test::check_invertible( @@ -185,12 +189,13 @@ class Tests_MGLouvain hr_timer.start("MG Louvain"); } - auto [dendrogram, mg_modularity] = - cugraph::louvain(*handle_, - mg_graph_view, - mg_edge_weight_view, - louvain_usecase.max_level_, - louvain_usecase.resolution_); + auto [dendrogram, mg_modularity] = cugraph::louvain( + *handle_, + mg_graph_view, + mg_edge_weight_view, + louvain_usecase.max_level_, + static_cast(louvain_usecase.threshold_), + static_cast(louvain_usecase.resolution_)); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -206,6 +211,7 @@ class Tests_MGLouvain mg_graph_view, mg_edge_weight_view, *dendrogram, + louvain_usecase.threshold_, louvain_usecase.resolution_, mg_modularity); } @@ -257,15 +263,16 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGLouvain_File, ::testing::Combine( // enable correctness checks for small graphs - ::testing::Values(Louvain_Usecase{100, 1}), + ::testing::Values(Louvain_Usecase{100, double{1e-7}, 1}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); -INSTANTIATE_TEST_SUITE_P(rmat_small_tests, - Tests_MGLouvain_Rmat, - ::testing::Combine(::testing::Values(Louvain_Usecase{100, 1}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, true, false)))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_tests, + Tests_MGLouvain_Rmat, + ::testing::Combine( + ::testing::Values(Louvain_Usecase{100, double{1e-7}, 1}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with @@ -276,7 +283,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGLouvain_File, ::testing::Combine( // disable correctness checks for large graphs - ::testing::Values(Louvain_Usecase{100, 1, false}), + ::testing::Values(Louvain_Usecase{100, double{1e-7}, 1, false}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( @@ -288,7 +295,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGLouvain_Rmat, ::testing::Combine( // disable correctness checks for large graphs - ::testing::Values(Louvain_Usecase{100, 1, false}), + ::testing::Values(Louvain_Usecase{100, double{1e-7}, 1, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/dependencies.yaml b/dependencies.yaml index 23bd5c800b6..04ec1b6e957 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -14,14 +14,14 @@ files: - python_build_cythonize - python_run_cugraph - python_run_pylibcugraph - - python_run_cugraph_nx + - python_run_nx_cugraph - python_run_cugraph_dgl - python_run_cugraph_pyg - test_notebook - test_python_common - test_python_cugraph - test_python_pylibcugraph - - test_python_cugraph_nx + - test_python_nx_cugraph checks: output: none includes: @@ -105,29 +105,29 @@ files: includes: - test_python_common - test_python_pylibcugraph - py_build_cugraph_nx: + py_build_nx_cugraph: output: pyproject - pyproject_dir: python/cugraph-nx + pyproject_dir: python/nx-cugraph extras: table: build-system includes: - python_build_wheel - py_run_cugraph_nx: + py_run_nx_cugraph: output: pyproject - pyproject_dir: python/cugraph-nx + pyproject_dir: python/nx-cugraph extras: table: project includes: - - python_run_cugraph_nx - py_test_cugraph_nx: + - python_run_nx_cugraph + py_test_nx_cugraph: output: pyproject - pyproject_dir: python/cugraph-nx + pyproject_dir: python/nx-cugraph extras: table: project.optional-dependencies key: test includes: - test_python_common - - test_python_cugraph_nx + - test_python_nx_cugraph py_build_cugraph_dgl: output: pyproject pyproject_dir: python/cugraph-dgl @@ -199,12 +199,24 @@ files: output: conda conda_dir: python/cugraph-dgl/conda includes: + - checks - cugraph_dgl_dev + - test_python_common + cugraph_pyg_dev: + matrix: + cuda: ["11.8"] + output: conda + conda_dir: python/cugraph-pyg/conda + includes: + - checks + - cugraph_pyg_dev + - test_python_common channels: - rapidsai - rapidsai-nightly - dask/label/dev - pytorch + - pyg - dglteam/label/cu118 - conda-forge - nvidia @@ -375,7 +387,7 @@ dependencies: packages: - *pylibraft - *rmm - python_run_cugraph_nx: + python_run_nx_cugraph: common: - output_types: [conda, pyproject] packages: @@ -482,12 +494,12 @@ dependencies: packages: - *cudf - *numpy - test_python_cugraph_nx: + test_python_nx_cugraph: common: - output_types: [conda, pyproject] packages: - packaging>=21 - # not needed by cugraph-nx tests, but is required for running networkx tests + # not needed by nx-cugraph tests, but is required for running networkx tests - pytest-mpl cugraph_dgl_dev: common: @@ -498,6 +510,12 @@ dependencies: - pytorch>=2.0 - pytorch-cuda==11.8 - dgl>=1.1.0.cu* - - setuptools - - pre-commit - - pytest + cugraph_pyg_dev: + common: + - output_types: [conda] + packages: + - cugraph==23.10.* + - pylibcugraphops==23.10.* + - pytorch==2.0 + - pytorch-cuda==11.8 + - pyg=2.3.1=*torch_2.0.0*cu118* diff --git a/docs/cugraph/source/installation/source_build.md b/docs/cugraph/source/installation/source_build.md index 453149d6cea..7782591f1ce 100644 --- a/docs/cugraph/source/installation/source_build.md +++ b/docs/cugraph/source/installation/source_build.md @@ -84,7 +84,7 @@ build.sh [ ...] [ ...] libcugraph_etl - build libcugraph_etl.so and SG test binaries pylibcugraph - build the pylibcugraph Python package cugraph - build the cugraph Python package - cugraph-nx - build the cugraph-nx Python package + nx-cugraph - build the nx-cugraph Python package cugraph-service - build the cugraph-service_client and cugraph-service_server Python package cpp-mgtests - build libcugraph and libcugraph_etl MG tests. Builds MPI communicator, adding MPI as a dependency. cugraph-dgl - build the cugraph-dgl extensions for DGL diff --git a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml index 2bb4b0f3cd3..138d384ebcf 100644 --- a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml +++ b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml @@ -5,16 +5,21 @@ channels: - rapidsai-nightly - dask/label/dev - pytorch +- pyg - dglteam/label/cu118 - conda-forge - nvidia dependencies: - cugraph==23.10.* - dgl>=1.1.0.cu* +- pandas - pre-commit - pylibcugraphops==23.10.* - pytest +- pytest-benchmark +- pytest-cov +- pytest-xdist - pytorch-cuda==11.8 - pytorch>=2.0 -- setuptools +- scipy name: cugraph_dgl_dev_cuda-118 diff --git a/python/cugraph-nx/cugraph_nx/tests/test_convert.py b/python/cugraph-nx/cugraph_nx/tests/test_convert.py deleted file mode 100644 index 7efba9ea555..00000000000 --- a/python/cugraph-nx/cugraph_nx/tests/test_convert.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import cupy as cp -import networkx as nx -import pytest - -import cugraph_nx as cnx -from cugraph_nx import interface - - -@pytest.mark.parametrize("graph_class", [nx.Graph, nx.DiGraph]) -@pytest.mark.parametrize( - "kwargs", - [ - {}, - {"preserve_edge_attrs": True}, - {"preserve_node_attrs": True}, - {"preserve_all_attrs": True}, - {"edge_attrs": {"x": 0}}, - {"edge_attrs": {"x": None}}, - {"edge_attrs": {"x": cnx.convert.REQUIRED}}, - {"edge_attrs": {"x": ...}}, # sugar for REQUIRED - {"edge_attrs": "x"}, - {"node_attrs": {"x": 0}}, - {"node_attrs": {"x": None}}, - {"node_attrs": {"x": cnx.convert.REQUIRED}}, - {"node_attrs": {"x": ...}}, # sugar for REQUIRED - {"node_attrs": "x"}, - ], -) -def test_convert_empty(graph_class, kwargs): - G = graph_class() - cG = cnx.from_networkx(G, **kwargs) - H = cnx.to_networkx(cG) - assert G.number_of_nodes() == cG.number_of_nodes() == H.number_of_nodes() == 0 - assert G.number_of_edges() == cG.number_of_edges() == H.number_of_edges() == 0 - assert cG.edge_values == cG.edge_masks == cG.node_values == cG.node_masks == {} - assert G.graph == cG.graph == H.graph == {} - - -def test_convert(): - # FIXME: can we break this into smaller tests? - G = nx.Graph() - G.add_edge(0, 1, x=2) - G.add_node(0, foo=10) - G.add_node(1, foo=20, bar=100) - for kwargs in [ - {"preserve_edge_attrs": True}, - {"preserve_all_attrs": True}, - {"edge_attrs": {"x": 0}}, - {"edge_attrs": {"x": None}, "node_attrs": {"bar": None}}, - {"edge_attrs": "x", "edge_dtypes": int}, - { - "edge_attrs": {"x": cnx.convert.REQUIRED}, - "node_attrs": {"foo": cnx.convert.REQUIRED}, - }, - {"edge_attrs": {"x": ...}, "node_attrs": {"foo": ...}}, # sugar for REQUIRED - ]: - # All edges have "x" attribute, so all kwargs are equivalent - cG = cnx.from_networkx(G, **kwargs) - cp.testing.assert_array_equal(cG.row_indices, [0, 1]) - cp.testing.assert_array_equal(cG.col_indices, [1, 0]) - cp.testing.assert_array_equal(cG.edge_values["x"], [2, 2]) - assert len(cG.edge_values) == 1 - assert cG.edge_masks == {} - H = cnx.to_networkx(cG) - assert G.number_of_nodes() == cG.number_of_nodes() == H.number_of_nodes() == 2 - assert G.number_of_edges() == cG.number_of_edges() == H.number_of_edges() == 1 - assert G.adj == H.adj - - with pytest.raises(KeyError, match="bar"): - cnx.from_networkx(G, node_attrs={"bar": ...}) - - # Structure-only graph (no edge attributes) - cG = cnx.from_networkx(G, preserve_node_attrs=True) - cp.testing.assert_array_equal(cG.row_indices, [0, 1]) - cp.testing.assert_array_equal(cG.col_indices, [1, 0]) - cp.testing.assert_array_equal(cG.node_values["foo"], [10, 20]) - assert cG.edge_values == cG.edge_masks == {} - H = cnx.to_networkx(cG) - assert set(G.edges) == set(H.edges) == {(0, 1)} - assert G.nodes == H.nodes - - # Fill completely missing attribute with default value - cG = cnx.from_networkx(G, edge_attrs={"y": 0}) - cp.testing.assert_array_equal(cG.row_indices, [0, 1]) - cp.testing.assert_array_equal(cG.col_indices, [1, 0]) - cp.testing.assert_array_equal(cG.edge_values["y"], [0, 0]) - assert len(cG.edge_values) == 1 - assert cG.edge_masks == cG.node_values == cG.node_masks == {} - H = cnx.to_networkx(cG) - assert list(H.edges(data=True)) == [(0, 1, {"y": 0})] - - # If attribute is completely missing (and no default), then just ignore it - cG = cnx.from_networkx(G, edge_attrs={"y": None}) - cp.testing.assert_array_equal(cG.row_indices, [0, 1]) - cp.testing.assert_array_equal(cG.col_indices, [1, 0]) - assert sorted(cG.edge_values) == sorted(cG.edge_masks) == [] - H = cnx.to_networkx(cG) - assert list(H.edges(data=True)) == [(0, 1, {})] - - G.add_edge(0, 2) - # Some edges are missing 'x' attribute; need to use a mask - for kwargs in [{"preserve_edge_attrs": True}, {"edge_attrs": {"x": None}}]: - cG = cnx.from_networkx(G, **kwargs) - cp.testing.assert_array_equal(cG.row_indices, [0, 0, 1, 2]) - cp.testing.assert_array_equal(cG.col_indices, [1, 2, 0, 0]) - assert sorted(cG.edge_values) == sorted(cG.edge_masks) == ["x"] - cp.testing.assert_array_equal(cG.edge_masks["x"], [True, False, True, False]) - cp.testing.assert_array_equal(cG.edge_values["x"][cG.edge_masks["x"]], [2, 2]) - H = cnx.to_networkx(cG) - assert list(H.edges(data=True)) == [(0, 1, {"x": 2}), (0, 2, {})] - - with pytest.raises(KeyError, match="x"): - cnx.from_networkx(G, edge_attrs={"x": cnx.convert.REQUIRED}) - with pytest.raises(KeyError, match="x"): - cnx.from_networkx(G, edge_attrs={"x": ...}) - with pytest.raises(KeyError, match="bar"): - cnx.from_networkx(G, node_attrs={"bar": cnx.convert.REQUIRED}) - with pytest.raises(KeyError, match="bar"): - cnx.from_networkx(G, node_attrs={"bar": ...}) - - # Now for something more complicated... - G = nx.Graph() - G.add_edge(10, 20, x=1) - G.add_edge(10, 30, x=2, y=1.5) - G.add_node(10, foo=100) - G.add_node(20, foo=200, bar=1000) - G.add_node(30, foo=300) - # Some edges have masks, some don't - for kwargs in [ - {"preserve_edge_attrs": True}, - {"preserve_all_attrs": True}, - {"edge_attrs": {"x": None, "y": None}}, - {"edge_attrs": {"x": 0, "y": None}}, - {"edge_attrs": {"x": 0, "y": None}}, - {"edge_attrs": {"x": 0, "y": None}, "edge_dtypes": {"x": int, "y": float}}, - ]: - cG = cnx.from_networkx(G, **kwargs) - assert cG.id_to_key == {0: 10, 1: 20, 2: 30} # Remap node IDs to 0, 1, ... - cp.testing.assert_array_equal(cG.row_indices, [0, 0, 1, 2]) - cp.testing.assert_array_equal(cG.col_indices, [1, 2, 0, 0]) - cp.testing.assert_array_equal(cG.edge_values["x"], [1, 2, 1, 2]) - assert sorted(cG.edge_masks) == ["y"] - cp.testing.assert_array_equal(cG.edge_masks["y"], [False, True, False, True]) - cp.testing.assert_array_equal( - cG.edge_values["y"][cG.edge_masks["y"]], [1.5, 1.5] - ) - H = cnx.to_networkx(cG) - assert G.adj == H.adj - - # Some nodes have masks, some don't - for kwargs in [ - {"preserve_node_attrs": True}, - {"preserve_all_attrs": True}, - {"node_attrs": {"foo": None, "bar": None}}, - {"node_attrs": {"foo": None, "bar": None}}, - {"node_attrs": {"foo": 0, "bar": None, "missing": None}}, - ]: - cG = cnx.from_networkx(G, **kwargs) - assert cG.id_to_key == {0: 10, 1: 20, 2: 30} # Remap node IDs to 0, 1, ... - cp.testing.assert_array_equal(cG.row_indices, [0, 0, 1, 2]) - cp.testing.assert_array_equal(cG.col_indices, [1, 2, 0, 0]) - cp.testing.assert_array_equal(cG.node_values["foo"], [100, 200, 300]) - assert sorted(cG.node_masks) == ["bar"] - cp.testing.assert_array_equal(cG.node_masks["bar"], [False, True, False]) - cp.testing.assert_array_equal( - cG.node_values["bar"][cG.node_masks["bar"]], [1000] - ) - H = cnx.to_networkx(cG) - assert G.nodes == H.nodes - - # Check default values for nodes - for kwargs in [ - {"node_attrs": {"foo": None, "bar": 0}}, - {"node_attrs": {"foo": None, "bar": 0, "missing": None}}, - {"node_attrs": {"bar": 0}}, - {"node_attrs": {"bar": 0}, "node_dtypes": {"bar": int}}, - {"node_attrs": {"bar": 0, "foo": None}, "node_dtypes": int}, - ]: - cG = cnx.from_networkx(G, **kwargs) - assert cG.id_to_key == {0: 10, 1: 20, 2: 30} # Remap node IDs to 0, 1, ... - cp.testing.assert_array_equal(cG.row_indices, [0, 0, 1, 2]) - cp.testing.assert_array_equal(cG.col_indices, [1, 2, 0, 0]) - cp.testing.assert_array_equal(cG.node_values["bar"], [0, 1000, 0]) - assert cG.node_masks == {} - - with pytest.raises( - TypeError, match="edge_attrs and weight arguments should not both be given" - ): - interface.BackendInterface.convert_from_nx(G, edge_attrs={"x": 1}, weight="x") - with pytest.raises(TypeError, match="Expected networkx.Graph"): - cnx.from_networkx({}) diff --git a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml new file mode 100644 index 00000000000..4e5159e6b45 --- /dev/null +++ b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml @@ -0,0 +1,25 @@ +# This file is generated by `rapids-dependency-file-generator`. +# To make changes, edit ../../../dependencies.yaml and run `rapids-dependency-file-generator`. +channels: +- rapidsai +- rapidsai-nightly +- dask/label/dev +- pytorch +- pyg +- dglteam/label/cu118 +- conda-forge +- nvidia +dependencies: +- cugraph==23.10.* +- pandas +- pre-commit +- pyg=2.3.1=*torch_2.0.0*cu118* +- pylibcugraphops==23.10.* +- pytest +- pytest-benchmark +- pytest-cov +- pytest-xdist +- pytorch-cuda==11.8 +- pytorch==2.0 +- scipy +name: cugraph_pyg_dev_cuda-118 diff --git a/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py index fcec341d1db..8d79685965f 100644 --- a/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py +++ b/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py @@ -351,6 +351,10 @@ def __init__( self.__input_nodes = input_nodes self.inner_loader_args = kwargs + @property + def batch_size(self) -> int: + return self.__batch_size + def __iter__(self): self.current_loader = EXPERIMENTAL__BulkSampleLoader( self.__feature_store, diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py index 0c94be5e12b..9c9dcdb43bb 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py @@ -13,10 +13,14 @@ from .gat_conv import GATConv from .gatv2_conv import GATv2Conv +from .rgcn_conv import RGCNConv +from .sage_conv import SAGEConv from .transformer_conv import TransformerConv __all__ = [ "GATConv", "GATv2Conv", + "RGCNConv", + "SAGEConv", "TransformerConv", ] diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py index 2639f66f440..10431a0398d 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/base.py @@ -15,10 +15,10 @@ from typing import Optional, Tuple, Union from cugraph.utilities.utils import import_optional +from pylibcugraphops.pytorch import CSC, HeteroCSC torch = import_optional("torch") torch_geometric = import_optional("torch_geometric") -ops_torch = import_optional("pylibcugraphops.pytorch") class BaseConv(torch.nn.Module): # pragma: no cover @@ -74,7 +74,7 @@ def get_cugraph( csc: Tuple[torch.Tensor, torch.Tensor, int], bipartite: bool = False, max_num_neighbors: Optional[int] = None, - ) -> ops_torch.CSC: + ) -> CSC: r"""Constructs a :obj:`cugraph-ops` graph object from CSC representation. Supports both bipartite and non-bipartite graphs. @@ -87,22 +87,22 @@ def get_cugraph( bipartite (bool): If set to :obj:`True`, will create the bipartite structure in cugraph-ops. (default: :obj:`False`) max_num_neighbors (int, optional): The maximum number of neighbors - of a target node. It is only effective when operating in a - bipartite graph. When not given, will be computed on-the-fly, - leading to slightly worse performance. (default: :obj:`None`) + of a destination node. When enabled, it allows models to use + the message-flow-graph primitives in cugraph-ops. + (default: :obj:`None`) """ row, colptr, num_src_nodes = csc if not row.is_cuda: raise RuntimeError( - f"'{self.__class__.__name__}' requires GPU-" - f"based processing (got CPU tensor)" + f"'{self.__class__.__name__}' requires GPU-based processing " + f"but got CPU tensor." ) if max_num_neighbors is None: max_num_neighbors = -1 - return ops_torch.CSC( + return CSC( offsets=colptr, indices=row, num_src_nodes=num_src_nodes, @@ -117,7 +117,7 @@ def get_typed_cugraph( num_edge_types: Optional[int] = None, bipartite: bool = False, max_num_neighbors: Optional[int] = None, - ) -> ops_torch.HeteroCSC: + ) -> HeteroCSC: r"""Constructs a typed :obj:`cugraph` graph object from a CSC representation where each edge corresponds to a given edge type. Supports both bipartite and non-bipartite graphs. @@ -135,9 +135,9 @@ def get_typed_cugraph( bipartite (bool): If set to :obj:`True`, will create the bipartite structure in cugraph-ops. (default: :obj:`False`) max_num_neighbors (int, optional): The maximum number of neighbors - of a target node. It is only effective when operating in a - bipartite graph. When not given, will be computed on-the-fly, - leading to slightly worse performance. (default: :obj:`None`) + of a destination node. When enabled, it allows models to use + the message-flow-graph primitives in cugraph-ops. + (default: :obj:`None`) """ if num_edge_types is None: num_edge_types = int(edge_type.max()) + 1 @@ -148,7 +148,7 @@ def get_typed_cugraph( row, colptr, num_src_nodes = csc edge_type = edge_type.int() - return ops_torch.HeteroCSC( + return HeteroCSC( offsets=colptr, indices=row, edge_types=edge_type, diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py index f0040015b4a..309bee4e228 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gat_conv.py @@ -10,16 +10,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from typing import Optional, Tuple, Union from cugraph.utilities.utils import import_optional +from pylibcugraphops.pytorch.operators import mha_gat_n2n from .base import BaseConv torch = import_optional("torch") nn = import_optional("torch.nn") torch_geometric = import_optional("torch_geometric") -ops_torch = import_optional("pylibcugraphops.pytorch") class GATConv(BaseConv): @@ -174,9 +175,9 @@ def forward( representation to the desired format. edge_attr: (torch.Tensor, optional) The edge features. max_num_neighbors (int, optional): The maximum number of neighbors - of a target node. It is only effective when operating in a - bipartite graph. When not given, will be computed on-the-fly, - leading to slightly worse performance. (default: :obj:`None`) + of a destination node. When enabled, it allows models to use + the message-flow-graph primitives in cugraph-ops. + (default: :obj:`None`) """ bipartite = not isinstance(x, torch.Tensor) graph = self.get_cugraph( @@ -210,7 +211,7 @@ def forward( ) x = self.lin(x) - out = ops_torch.operators.mha_gat_n2n( + out = mha_gat_n2n( (x_src, x_dst) if bipartite else x, self.att, graph, diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py index d74ca6b00d0..32956dcb400 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/gatv2_conv.py @@ -10,16 +10,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from typing import Optional, Tuple, Union from cugraph.utilities.utils import import_optional +from pylibcugraphops.pytorch.operators import mha_gat_v2_n2n from .base import BaseConv torch = import_optional("torch") nn = import_optional("torch.nn") torch_geometric = import_optional("torch_geometric") -ops_torch = import_optional("pylibcugraphops.pytorch") class GATv2Conv(BaseConv): @@ -207,7 +208,7 @@ def forward( else: x = self.lin_src(x) - out = ops_torch.operators.mha_gat_v2_n2n( + out = mha_gat_v2_n2n( (x_src, x_dst) if bipartite else x, self.att, graph, diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py new file mode 100644 index 00000000000..683780b66eb --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/rgcn_conv.py @@ -0,0 +1,141 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, Tuple + +from cugraph.utilities.utils import import_optional +from pylibcugraphops.pytorch.operators import agg_hg_basis_n2n_post + +from .base import BaseConv + +torch = import_optional("torch") +torch_geometric = import_optional("torch_geometric") + + +class RGCNConv(BaseConv): # pragma: no cover + r"""The relational graph convolutional operator from the `"Modeling + Relational Data with Graph Convolutional Networks" + `_ paper. + + .. math:: + \mathbf{x}^{\prime}_i = \mathbf{\Theta}_{\textrm{root}} \cdot + \mathbf{x}_i + \sum_{r \in \mathcal{R}} \sum_{j \in \mathcal{N}_r(i)} + \frac{1}{|\mathcal{N}_r(i)|} \mathbf{\Theta}_r \cdot \mathbf{x}_j, + + where :math:`\mathcal{R}` denotes the set of relations, *i.e.* edge types. + Edge type needs to be a one-dimensional :obj:`torch.long` tensor which + stores a relation identifier + :math:`\in \{ 0, \ldots, |\mathcal{R}| - 1\}` for each edge. + + Args: + in_channels (int): Size of each input sample. + out_channels (int): Size of each output sample. + num_relations (int): Number of relations. + num_bases (int, optional): If set, this layer will use the + basis-decomposition regularization scheme where :obj:`num_bases` + denotes the number of bases to use. (default: :obj:`None`) + aggr (str, optional): The aggregation scheme to use + (:obj:`"add"`, :obj:`"mean"`, :obj:`"sum"`). + (default: :obj:`"mean"`) + root_weight (bool, optional): If set to :obj:`False`, the layer will + not add transformed root node features to the output. + (default: :obj:`True`) + bias (bool, optional): If set to :obj:`False`, the layer will not learn + an additive bias. (default: :obj:`True`) + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + num_relations: int, + num_bases: Optional[int] = None, + aggr: str = "mean", + root_weight: bool = True, + bias: bool = True, + ): + super().__init__() + + if aggr not in ["mean", "sum", "add"]: + raise ValueError( + f"Aggregation function must be chosen from 'mean', 'sum' or " + f"'add', but got '{aggr}'." + ) + + self.in_channels = in_channels + self.out_channels = out_channels + self.num_relations = num_relations + self.num_bases = num_bases + self.aggr = aggr + self.root_weight = root_weight + + dim_root_weight = 1 if root_weight else 0 + + if num_bases is not None: + self.weight = torch.nn.Parameter( + torch.empty(num_bases + dim_root_weight, in_channels, out_channels) + ) + self.comp = torch.nn.Parameter(torch.empty(num_relations, num_bases)) + else: + self.weight = torch.nn.Parameter( + torch.empty(num_relations + dim_root_weight, in_channels, out_channels) + ) + self.register_parameter("comp", None) + + if bias: + self.bias = torch.nn.Parameter(torch.empty(out_channels)) + else: + self.register_parameter("bias", None) + + self.reset_parameters() + + def reset_parameters(self): + end = -1 if self.root_weight else None + torch_geometric.nn.inits.glorot(self.weight[:end]) + torch_geometric.nn.inits.glorot(self.comp) + if self.root_weight: + torch_geometric.nn.inits.glorot(self.weight[-1]) + torch_geometric.nn.inits.zeros(self.bias) + + def forward( + self, + x: torch.Tensor, + csc: Tuple[torch.Tensor, torch.Tensor, int], + edge_type: torch.Tensor, + max_num_neighbors: Optional[int] = None, + ) -> torch.Tensor: + + graph = self.get_typed_cugraph( + csc, edge_type, self.num_relations, max_num_neighbors=max_num_neighbors + ) + + out = agg_hg_basis_n2n_post( + x, + self.comp, + graph, + concat_own=self.root_weight, + norm_by_out_degree=bool(self.aggr == "mean"), + ) + + out = out @ self.weight.view(-1, self.out_channels) + + if self.bias is not None: + out = out + self.bias + + return out + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}({self.in_channels}, " + f"{self.out_channels}, num_relations={self.num_relations})" + ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py new file mode 100644 index 00000000000..8e0c1027416 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/sage_conv.py @@ -0,0 +1,149 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, Tuple, Union + +from cugraph.utilities.utils import import_optional +from pylibcugraphops.pytorch.operators import agg_concat_n2n + +from .base import BaseConv + +torch = import_optional("torch") +torch_geometric = import_optional("torch_geometric") + + +class SAGEConv(BaseConv): + r"""The GraphSAGE operator from the `"Inductive Representation Learning on + Large Graphs" `_ paper. + + .. math:: + \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W}_2 \cdot + \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j + + If :obj:`project = True`, then :math:`\mathbf{x}_j` will first get + projected via + + .. math:: + \mathbf{x}_j \leftarrow \sigma ( \mathbf{W}_3 \mathbf{x}_j + + \mathbf{b}) + + as described in Eq. (3) of the paper. + + Args: + in_channels (int or tuple): Size of each input sample. A tuple + corresponds to the sizes of source and target dimensionalities. + out_channels (int): Size of each output sample. + aggr (str or Aggregation, optional): The aggregation scheme to use. + Choose from :obj:`"mean"`, :obj:`"sum"`, :obj:`"min"` or + :obj:`"max"`. (default: :obj:`"mean"`) + normalize (bool, optional): If set to :obj:`True`, output features + will be :math:`\ell_2`-normalized, *i.e.*, + :math:`\frac{\mathbf{h}_i^{k+1}} + {\| \mathbf{h}_i^{k+1} \|_2}`. + (default: :obj:`False`) + root_weight (bool, optional): If set to :obj:`False`, the layer will + not add transformed root node features to the output. + (default: :obj:`True`) + project (bool, optional): If set to :obj:`True`, the layer will apply a + linear transformation followed by an activation function before + aggregation (as described in Eq. (3) of the paper). + (default: :obj:`False`) + bias (bool, optional): If set to :obj:`False`, the layer will not learn + an additive bias. (default: :obj:`True`) + """ + + def __init__( + self, + in_channels: Union[int, Tuple[int, int]], + out_channels: int, + aggr: str = "mean", + normalize: bool = False, + root_weight: bool = True, + project: bool = False, + bias: bool = True, + ): + super().__init__() + + if aggr not in ["mean", "sum", "min", "max"]: + raise ValueError( + f"Aggregation function must be chosen from 'mean'," + f" 'sum', 'min' or 'max', but got '{aggr}'." + ) + + self.in_channels = in_channels + self.out_channels = out_channels + self.aggr = aggr + self.normalize = normalize + self.root_weight = root_weight + self.project = project + + if isinstance(in_channels, int): + self.in_channels_src = self.in_channels_dst = in_channels + else: + self.in_channels_src, self.in_channels_dst = in_channels + + if self.project: + self.pre_lin = torch_geometric.nn.Linear( + self.in_channels_src, self.in_channels_src, bias=True + ) + + if self.root_weight: + self.lin = torch_geometric.nn.Linear( + self.in_channels_src + self.in_channels_dst, out_channels, bias=bias + ) + else: + self.lin = torch_geometric.nn.Linear( + self.in_channels_src, out_channels, bias=bias + ) + + self.reset_parameters() + + def reset_parameters(self): + if self.project: + self.pre_lin.reset_parameters() + self.lin.reset_parameters() + + def forward( + self, + x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], + csc: Tuple[torch.Tensor, torch.Tensor, int], + max_num_neighbors: Optional[int] = None, + ) -> torch.Tensor: + bipartite = isinstance(x, Tuple) + graph = self.get_cugraph( + csc, bipartite=bipartite, max_num_neighbors=max_num_neighbors + ) + + if self.project: + if bipartite: + x = (self.pre_lin(x[0]).relu(), x[1]) + else: + x = self.pre_lin(x).relu() + + out = agg_concat_n2n(x, graph, self.aggr) + + if self.root_weight: + out = self.lin(out) + else: + out = self.lin(out[:, : self.in_channels_src]) + + if self.normalize: + out = torch.nn.functional.normalize(out, p=2.0, dim=-1) + + return out + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}({self.in_channels}, " + f"{self.out_channels}, aggr={self.aggr})" + ) diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py index 1b8b1aa0ffa..41c0b4b4090 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/transformer_conv.py @@ -10,16 +10,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from typing import Optional, Tuple, Union from cugraph.utilities.utils import import_optional +from pylibcugraphops.pytorch.operators import mha_simple_n2n from .base import BaseConv torch = import_optional("torch") nn = import_optional("torch.nn") torch_geometric = import_optional("torch_geometric") -ops_torch = import_optional("pylibcugraphops.pytorch") class TransformerConv(BaseConv): @@ -185,7 +186,7 @@ def forward( ) edge_attr = self.lin_edge(edge_attr) - out = ops_torch.operators.mha_simple_n2n( + out = mha_simple_n2n( key, query, value, diff --git a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py index 3270dd0bf93..083c4a2b37b 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py @@ -265,3 +265,22 @@ def abc_graph(): ) return F, G, N + + +@pytest.fixture +def basic_pyg_graph_1(): + edge_index = torch.tensor([[0, 1, 2, 3], [0, 0, 1, 1]]) + size = (4, 4) + return edge_index, size + + +@pytest.fixture +def basic_pyg_graph_2(): + edge_index = torch.tensor( + [ + [0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9], + [1, 9, 2, 9, 9, 4, 9, 5, 9, 6, 9, 9, 8, 9, 0], + ] + ) + size = (10, 10) + return edge_index, size diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py index ae5fd73c438..21c43bad38c 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py @@ -13,15 +13,9 @@ import pytest -try: - from torch_geometric.nn import GATConv -except ModuleNotFoundError: - pytest.skip("PyG not available", allow_module_level=True) - -from cugraph.utilities.utils import import_optional from cugraph_pyg.nn import GATConv as CuGraphGATConv -torch = import_optional("torch") +ATOL = 1e-6 @pytest.mark.parametrize("bias", [True, False]) @@ -30,17 +24,16 @@ @pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) @pytest.mark.parametrize("max_num_neighbors", [8, None]) @pytest.mark.parametrize("use_edge_attr", [True, False]) +@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) def test_gat_conv_equality( - bias, bipartite, concat, heads, max_num_neighbors, use_edge_attr + bias, bipartite, concat, heads, max_num_neighbors, use_edge_attr, graph, request ): - atol = 1e-6 - edge_index = torch.tensor( - [ - [7, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 8, 9], - [0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7], - ], - ).cuda() - size = (10, 10) + pytest.importorskip("torch_geometric", reason="PyG not available") + import torch + from torch_geometric.nn import GATConv + + edge_index, size = request.getfixturevalue(graph) + edge_index = edge_index.cuda() if bipartite: in_channels = (5, 3) @@ -87,7 +80,7 @@ def test_gat_conv_equality( out1 = conv1(x, edge_index, edge_attr=edge_attr) out2 = conv2(x, csc, edge_attr=edge_attr_perm, max_num_neighbors=max_num_neighbors) - assert torch.allclose(out1, out2, atol=atol) + assert torch.allclose(out1, out2, atol=ATOL) grad_output = torch.rand_like(out1) out1.backward(grad_output) @@ -95,30 +88,30 @@ def test_gat_conv_equality( if bipartite: assert torch.allclose( - conv1.lin_src.weight.grad, conv2.lin_src.weight.grad, atol=atol + conv1.lin_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL ) assert torch.allclose( - conv1.lin_dst.weight.grad, conv2.lin_dst.weight.grad, atol=atol + conv1.lin_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL ) else: assert torch.allclose( - conv1.lin_src.weight.grad, conv2.lin.weight.grad, atol=atol + conv1.lin_src.weight.grad, conv2.lin.weight.grad, atol=ATOL ) assert torch.allclose( - conv1.att_src.grad.flatten(), conv2.att.grad[:out_dim], atol=atol + conv1.att_src.grad.flatten(), conv2.att.grad[:out_dim], atol=ATOL ) assert torch.allclose( - conv1.att_dst.grad.flatten(), conv2.att.grad[out_dim : 2 * out_dim], atol=atol + conv1.att_dst.grad.flatten(), conv2.att.grad[out_dim : 2 * out_dim], atol=ATOL ) if use_edge_attr: assert torch.allclose( - conv1.att_edge.grad.flatten(), conv2.att.grad[2 * out_dim :], atol=atol + conv1.att_edge.grad.flatten(), conv2.att.grad[2 * out_dim :], atol=ATOL ) assert torch.allclose( - conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=atol + conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL ) if bias: - assert torch.allclose(conv1.bias.grad, conv2.bias.grad, atol=atol) + assert torch.allclose(conv1.bias.grad, conv2.bias.grad, atol=ATOL) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py index 1c4f241304e..6b11e87154a 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py @@ -13,30 +13,23 @@ import pytest -try: - from torch_geometric.nn import GATv2Conv -except ModuleNotFoundError: - pytest.skip("PyG not available", allow_module_level=True) - -from cugraph.utilities.utils import import_optional from cugraph_pyg.nn import GATv2Conv as CuGraphGATv2Conv -torch = import_optional("torch") +ATOL = 1e-6 @pytest.mark.parametrize("bipartite", [True, False]) @pytest.mark.parametrize("concat", [True, False]) @pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) @pytest.mark.parametrize("use_edge_attr", [True, False]) -def test_gatv2_conv_equality(bipartite, concat, heads, use_edge_attr): - atol = 1e-6 - edge_index = torch.tensor( - [ - [7, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 8, 9], - [0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7], - ], - ).cuda() - size = (10, 10) +@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) +def test_gatv2_conv_equality(bipartite, concat, heads, use_edge_attr, graph, request): + pytest.importorskip("torch_geometric", reason="PyG not available") + import torch + from torch_geometric.nn import GATv2Conv + + edge_index, size = request.getfixturevalue(graph) + edge_index = edge_index.cuda() if bipartite: in_channels = (5, 3) @@ -70,26 +63,24 @@ def test_gatv2_conv_equality(bipartite, concat, heads, use_edge_attr): with torch.no_grad(): conv2.lin_src.weight.data = conv1.lin_l.weight.data.detach().clone() conv2.lin_dst.weight.data = conv1.lin_r.weight.data.detach().clone() - conv2.att.data = conv1.att.data.flatten().detach().clone() - if use_edge_attr: conv2.lin_edge.weight.data = conv1.lin_edge.weight.data.detach().clone() out1 = conv1(x, edge_index, edge_attr=edge_attr) out2 = conv2(x, csc, edge_attr=edge_attr_perm) - assert torch.allclose(out1, out2, atol=atol) + assert torch.allclose(out1, out2, atol=ATOL) grad_output = torch.rand_like(out1) out1.backward(grad_output) out2.backward(grad_output) - assert torch.allclose(conv1.lin_l.weight.grad, conv2.lin_src.weight.grad, atol=atol) - assert torch.allclose(conv1.lin_r.weight.grad, conv2.lin_dst.weight.grad, atol=atol) + assert torch.allclose(conv1.lin_l.weight.grad, conv2.lin_src.weight.grad, atol=ATOL) + assert torch.allclose(conv1.lin_r.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL) - assert torch.allclose(conv1.att.grad.flatten(), conv2.att.grad, atol=atol) + assert torch.allclose(conv1.att.grad.flatten(), conv2.att.grad, atol=ATOL) if use_edge_attr: assert torch.allclose( - conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=atol + conv1.lin_edge.weight.grad, conv2.lin_edge.weight.grad, atol=ATOL ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py new file mode 100644 index 00000000000..233c6aa2836 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py @@ -0,0 +1,71 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from cugraph_pyg.nn import RGCNConv as CuGraphRGCNConv + +ATOL = 1e-6 + + +@pytest.mark.parametrize("aggr", ["add", "sum", "mean"]) +@pytest.mark.parametrize("bias", [True, False]) +@pytest.mark.parametrize("max_num_neighbors", [8, None]) +@pytest.mark.parametrize("num_bases", [1, 2, None]) +@pytest.mark.parametrize("root_weight", [True, False]) +@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) +def test_rgcn_conv_equality( + aggr, bias, max_num_neighbors, num_bases, root_weight, graph, request +): + pytest.importorskip("torch_geometric", reason="PyG not available") + import torch + from torch_geometric.nn import FastRGCNConv as RGCNConv + + in_channels, out_channels, num_relations = (4, 2, 3) + kwargs = dict(aggr=aggr, bias=bias, num_bases=num_bases, root_weight=root_weight) + + edge_index, size = request.getfixturevalue(graph) + edge_index = edge_index.cuda() + edge_type = torch.randint(num_relations, (edge_index.size(1),)).cuda() + + x = torch.rand(size[0], in_channels, device="cuda") + csc, edge_type_perm = CuGraphRGCNConv.to_csc(edge_index, size, edge_type) + + conv1 = RGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda() + conv2 = CuGraphRGCNConv(in_channels, out_channels, num_relations, **kwargs).cuda() + + with torch.no_grad(): + if root_weight: + conv2.weight.data[:-1] = conv1.weight.data + conv2.weight.data[-1] = conv1.root.data + else: + conv2.weight.data = conv1.weight.data.detach().clone() + if num_bases is not None: + conv2.comp.data = conv1.comp.data.detach().clone() + + out1 = conv1(x, edge_index, edge_type) + out2 = conv2(x, csc, edge_type_perm, max_num_neighbors=max_num_neighbors) + assert torch.allclose(out1, out2, atol=ATOL) + + grad_out = torch.rand_like(out1) + out1.backward(grad_out) + out2.backward(grad_out) + + if root_weight: + assert torch.allclose(conv1.weight.grad, conv2.weight.grad[:-1], atol=ATOL) + assert torch.allclose(conv1.root.grad, conv2.weight.grad[-1], atol=ATOL) + else: + assert torch.allclose(conv1.weight.grad, conv2.weight.grad, atol=ATOL) + + if num_bases is not None: + assert torch.allclose(conv1.comp.grad, conv2.comp.grad, atol=ATOL) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py new file mode 100644 index 00000000000..7f73cddbdbb --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py @@ -0,0 +1,89 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from cugraph_pyg.nn import SAGEConv as CuGraphSAGEConv + +ATOL = 1e-6 + + +@pytest.mark.parametrize("aggr", ["sum", "mean", "min", "max"]) +@pytest.mark.parametrize("bias", [True, False]) +@pytest.mark.parametrize("bipartite", [True, False]) +@pytest.mark.parametrize("max_num_neighbors", [8, None]) +@pytest.mark.parametrize("normalize", [True, False]) +@pytest.mark.parametrize("root_weight", [True, False]) +@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) +def test_sage_conv_equality( + aggr, bias, bipartite, max_num_neighbors, normalize, root_weight, graph, request +): + pytest.importorskip("torch_geometric", reason="PyG not available") + import torch + from torch_geometric.nn import SAGEConv + + edge_index, size = request.getfixturevalue(graph) + edge_index = edge_index.cuda() + csc = CuGraphSAGEConv.to_csc(edge_index, size) + + if bipartite: + in_channels = (7, 3) + x = ( + torch.rand(size[0], in_channels[0]).cuda(), + torch.rand(size[1], in_channels[1]).cuda(), + ) + else: + in_channels = 5 + x = torch.rand(size[0], in_channels).cuda() + out_channels = 4 + + kwargs = dict(aggr=aggr, bias=bias, normalize=normalize, root_weight=root_weight) + + conv1 = SAGEConv(in_channels, out_channels, **kwargs).cuda() + conv2 = CuGraphSAGEConv(in_channels, out_channels, **kwargs).cuda() + + in_channels_src = conv2.in_channels_src + with torch.no_grad(): + conv2.lin.weight.data[:, :in_channels_src] = conv1.lin_l.weight.data + if root_weight: + conv2.lin.weight.data[:, in_channels_src:] = conv1.lin_r.weight.data + if bias: + conv2.lin.bias.data[:] = conv1.lin_l.bias.data + + out1 = conv1(x, edge_index) + out2 = conv2(x, csc, max_num_neighbors=max_num_neighbors) + assert torch.allclose(out1, out2, atol=ATOL) + + grad_out = torch.rand_like(out1) + out1.backward(grad_out) + out2.backward(grad_out) + + assert torch.allclose( + conv1.lin_l.weight.grad, + conv2.lin.weight.grad[:, :in_channels_src], + atol=ATOL, + ) + + if root_weight: + assert torch.allclose( + conv1.lin_r.weight.grad, + conv2.lin.weight.grad[:, in_channels_src:], + atol=ATOL, + ) + + if bias: + assert torch.allclose( + conv1.lin_l.bias.grad, + conv2.lin.bias.grad, + atol=ATOL, + ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py index a2153ee7891..7dba1a6d515 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py @@ -13,23 +13,25 @@ import pytest -try: - from torch_geometric.nn import TransformerConv -except ModuleNotFoundError: - pytest.skip("PyG not available", allow_module_level=True) - -from cugraph.utilities.utils import import_optional from cugraph_pyg.nn import TransformerConv as CuGraphTransformerConv -torch = import_optional("torch") +ATOL = 1e-6 @pytest.mark.parametrize("bipartite", [True, False]) @pytest.mark.parametrize("concat", [True, False]) @pytest.mark.parametrize("heads", [1, 2, 3, 5, 10, 16]) -def test_transformer_conv_equality(bipartite, concat, heads): +@pytest.mark.parametrize("graph", ["basic_pyg_graph_1", "basic_pyg_graph_2"]) +def test_transformer_conv_equality(bipartite, concat, heads, graph, request): + pytest.importorskip("torch_geometric", reason="PyG not available") + import torch + from torch_geometric.nn import TransformerConv + + edge_index, size = request.getfixturevalue(graph) + edge_index = edge_index.cuda() + csc = CuGraphTransformerConv.to_csc(edge_index, size) + out_channels = 2 - size = (10, 10) kwargs = dict(concat=concat, bias=False, root_weight=False) if bipartite: @@ -42,14 +44,6 @@ def test_transformer_conv_equality(bipartite, concat, heads): in_channels = 5 x = torch.rand(size[0], in_channels, device="cuda") - edge_index = torch.tensor( - [ - [7, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 8, 9, 3, 4, 5], - [0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 5, 5, 6], - ], - device="cuda", - ) - conv1 = TransformerConv(in_channels, out_channels, heads, **kwargs).cuda() conv2 = CuGraphTransformerConv(in_channels, out_channels, heads, **kwargs).cuda() @@ -62,30 +56,27 @@ def test_transformer_conv_equality(bipartite, concat, heads): conv2.lin_value.bias.data = conv1.lin_value.bias.data.detach().clone() out1 = conv1(x, edge_index) - csc = CuGraphTransformerConv.to_csc(edge_index, size) out2 = conv2(x, csc) - atol = 1e-6 - - assert torch.allclose(out1, out2, atol=atol) + assert torch.allclose(out1, out2, atol=ATOL) grad_output = torch.rand_like(out1) out1.backward(grad_output) out2.backward(grad_output) assert torch.allclose( - conv1.lin_query.weight.grad, conv2.lin_query.weight.grad, atol=atol + conv1.lin_query.weight.grad, conv2.lin_query.weight.grad, atol=ATOL ) assert torch.allclose( - conv1.lin_key.weight.grad, conv2.lin_key.weight.grad, atol=atol + conv1.lin_key.weight.grad, conv2.lin_key.weight.grad, atol=ATOL ) assert torch.allclose( - conv1.lin_value.weight.grad, conv2.lin_value.weight.grad, atol=atol + conv1.lin_value.weight.grad, conv2.lin_value.weight.grad, atol=ATOL ) assert torch.allclose( - conv1.lin_query.bias.grad, conv2.lin_query.bias.grad, atol=atol + conv1.lin_query.bias.grad, conv2.lin_query.bias.grad, atol=ATOL ) - assert torch.allclose(conv1.lin_key.bias.grad, conv2.lin_key.bias.grad, atol=atol) + assert torch.allclose(conv1.lin_key.bias.grad, conv2.lin_key.bias.grad, atol=ATOL) assert torch.allclose( - conv1.lin_value.bias.grad, conv2.lin_value.bias.grad, atol=atol + conv1.lin_value.bias.grad, conv2.lin_value.bias.grad, atol=ATOL ) diff --git a/python/cugraph/cugraph/community/louvain.py b/python/cugraph/cugraph/community/louvain.py index 35ca864824f..7f9742c8f09 100644 --- a/python/cugraph/cugraph/community/louvain.py +++ b/python/cugraph/cugraph/community/louvain.py @@ -17,11 +17,13 @@ ) import cudf +import warnings from pylibcugraph import louvain as pylibcugraph_louvain from pylibcugraph import ResourceHandle -def louvain(G, max_iter=100, resolution=1.0): +# FIXME: max_level should default to 100 once max_iter is removed +def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7): """ Compute the modularity optimizing partition of the input graph using the Louvain method @@ -40,18 +42,30 @@ def louvain(G, max_iter=100, resolution=1.0): present. The current implementation only supports undirected graphs. - max_iter : integer, optional (default=100) - This controls the maximum number of levels/iterations of the Louvain + max_level : integer, optional (default=100) + This controls the maximum number of levels of the Louvain algorithm. When specified the algorithm will terminate after no more - than the specified number of iterations. No error occurs when the + than the specified number of levels. No error occurs when the algorithm terminates early in this manner. - resolution: float/double, optional (default=1.0) + max_iter : integer, optional (default=None) + This parameter is deprecated in favor of max_level. Previously + it was used to control the maximum number of levels of the Louvain + algorithm. + + resolution: float, optional (default=1.0) Called gamma in the modularity formula, this changes the size of the communities. Higher resolutions lead to more smaller communities, lower resolutions lead to fewer larger communities. Defaults to 1. + threshold: float + Modularity gain threshold for each level. If the gain of + modularity between 2 levels of the algorithm is less than the + given threshold then the algorithm stops and returns the + resulting communities. + Defaults to 1e-7. + Returns ------- parts : cudf.DataFrame @@ -80,10 +94,29 @@ def louvain(G, max_iter=100, resolution=1.0): if G.is_directed(): raise ValueError("input graph must be undirected") + # FIXME: This max_iter logic and the max_level defaulting can be deleted + # in favor of defaulting max_level in call once max_iter is deleted + if max_iter: + if max_level: + raise ValueError( + "max_iter is deprecated. Cannot specify both max_iter and max_level" + ) + + warning_msg = ( + "max_iter has been renamed max_level. Use of max_iter is " + "deprecated and will no longer be supported in the next releases." + ) + warnings.warn(warning_msg, FutureWarning) + max_level = max_iter + + if max_level is None: + max_level = 100 + vertex, partition, mod_score = pylibcugraph_louvain( resource_handle=ResourceHandle(), graph=G._plc_graph, - max_level=max_iter, + max_level=max_level, + threshold=threshold, resolution=resolution, do_expensive_check=False, ) diff --git a/python/cugraph/cugraph/dask/common/part_utils.py b/python/cugraph/cugraph/dask/common/part_utils.py index fda7e257367..7c0aad6c3ee 100644 --- a/python/cugraph/cugraph/dask/common/part_utils.py +++ b/python/cugraph/cugraph/dask/common/part_utils.py @@ -73,7 +73,7 @@ def persist_distributed_data(dask_df, client): _keys = dask_df.__dask_keys__() worker_dict = {} for i, key in enumerate(_keys): - worker_dict[str(key)] = tuple([worker_addresses[i]]) + worker_dict[key] = tuple([worker_addresses[i]]) persisted = client.persist(dask_df, workers=worker_dict) parts = futures_of(persisted) return parts @@ -89,7 +89,7 @@ def get_persisted_df_worker_map(dask_df, client): ddf_keys = futures_of(dask_df) output_map = {} for w, w_keys in client.has_what().items(): - output_map[w] = [ddf_k for ddf_k in ddf_keys if str(ddf_k.key) in w_keys] + output_map[w] = [ddf_k for ddf_k in ddf_keys if ddf_k.key in w_keys] if len(output_map[w]) == 0: output_map[w] = _create_empty_dask_df_future(dask_df._meta, client, w) return output_map @@ -157,7 +157,7 @@ async def _extract_partitions( # NOTE: We colocate (X, y) here by zipping delayed # n partitions of them as (X1, y1), (X2, y2)... # and asking client to compute a single future for - # each tuple in the list + # each tuple in the list. dela = [np.asarray(d.to_delayed()) for d in dask_obj] # TODO: ravel() is causing strange behavior w/ delayed Arrays which are @@ -167,7 +167,7 @@ async def _extract_partitions( parts = client.compute([p for p in zip(*raveled)]) await wait(parts) - key_to_part = [(str(part.key), part) for part in parts] + key_to_part = [(part.key, part) for part in parts] who_has = await client.who_has(parts) return [(first(who_has[key]), part) for key, part in key_to_part] @@ -229,7 +229,7 @@ def load_balance_func(ddf_, by, client=None): wait(parts) who_has = client.who_has(parts) - key_to_part = [(str(part.key), part) for part in parts] + key_to_part = [(part.key, part) for part in parts] gpu_fututres = [ (first(who_has[key]), part.key[1], part) for key, part in key_to_part ] @@ -245,7 +245,7 @@ def load_balance_func(ddf_, by, client=None): for cumsum in cumsum_parts: num_rows.append(cumsum.iloc[-1]) - # Calculate current partition divisions + # Calculate current partition divisions. divisions = [sum(num_rows[0:x:1]) for x in range(0, len(num_rows) + 1)] divisions[-1] = divisions[-1] - 1 divisions = tuple(divisions) @@ -271,7 +271,7 @@ def load_balance_func(ddf_, by, client=None): def concat_dfs(df_list): """ - Concat a list of cudf dataframes + Concat a list of cudf dataframes. """ return cudf.concat(df_list) @@ -279,17 +279,17 @@ def concat_dfs(df_list): def get_delayed_dict(ddf): """ Returns a dicitionary with the dataframe tasks as keys and - the dataframe delayed objects as values + the dataframe delayed objects as values. """ df_delayed = {} for delayed_obj in ddf.to_delayed(): - df_delayed[str(delayed_obj.key)] = delayed_obj + df_delayed[delayed_obj.key] = delayed_obj return df_delayed def concat_within_workers(client, ddf): """ - Concats all partitions within workers without transfers + Concats all partitions within workers without transfers. """ df_delayed = get_delayed_dict(ddf) diff --git a/python/cugraph/cugraph/dask/community/louvain.py b/python/cugraph/cugraph/dask/community/louvain.py index c003939f5eb..8efbbafaf7b 100644 --- a/python/cugraph/cugraph/dask/community/louvain.py +++ b/python/cugraph/cugraph/dask/community/louvain.py @@ -28,6 +28,8 @@ from pylibcugraph import louvain as pylibcugraph_louvain from typing import Tuple, TYPE_CHECKING +import warnings + if TYPE_CHECKING: from cugraph import Graph @@ -45,19 +47,30 @@ def convert_to_cudf(result: cp.ndarray) -> Tuple[cudf.DataFrame, float]: def _call_plc_louvain( - sID: bytes, mg_graph_x, max_iter: int, resolution: int, do_expensive_check: bool + sID: bytes, + mg_graph_x, + max_level: int, + threshold: float, + resolution: float, + do_expensive_check: bool, ) -> Tuple[cp.ndarray, cp.ndarray, float]: return pylibcugraph_louvain( resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), graph=mg_graph_x, - max_level=max_iter, + max_level=max_level, + threshold=threshold, resolution=resolution, do_expensive_check=do_expensive_check, ) +# FIXME: max_level should default to 100 once max_iter is removed def louvain( - input_graph: Graph, max_iter: int = 100, resolution: int = 1.0 + input_graph: Graph, + max_level: int = None, + max_iter: int = None, + resolution: float = 1.0, + threshold: float = 1e-7, ) -> Tuple[dask_cudf.DataFrame, float]: """ Compute the modularity optimizing partition of the input graph using the @@ -77,17 +90,27 @@ def louvain( present. The current implementation only supports undirected graphs. - max_iter : integer, optional (default=100) - This controls the maximum number of levels/iterations of the Louvain + max_level : integer, optional (default=100) + This controls the maximum number of levels of the Louvain algorithm. When specified the algorithm will terminate after no more - than the specified number of iterations. No error occurs when the + than the specified number of levels. No error occurs when the algorithm terminates early in this manner. - resolution: float/double, optional (default=1.0) + max_iter : integer, optional (default=None) + This parameter is deprecated in favor of max_level. Previously + it was used to control the maximum number of levels of the Louvain + algorithm. + + resolution: float, optional (default=1.0) Called gamma in the modularity formula, this changes the size of the communities. Higher resolutions lead to more smaller communities, lower resolutions lead to fewer larger communities. - Defaults to 1. + + threshold: float, optional (default=1e-7) + Modularity gain threshold for each level. If the gain of + modularity between 2 levels of the algorithm is less than the + given threshold then the algorithm stops and returns the + resulting communities. Returns ------- @@ -115,6 +138,24 @@ def louvain( if input_graph.is_directed(): raise ValueError("input graph must be undirected") + # FIXME: This max_iter logic and the max_level defaulting can be deleted + # in favor of defaulting max_level in call once max_iter is deleted + if max_iter: + if max_level: + raise ValueError( + "max_iter is deprecated. Cannot specify both max_iter and max_level" + ) + + warning_msg = ( + "max_iter has been renamed max_level. Use of max_iter is " + "deprecated and will no longer be supported in the next releases. " + ) + warnings.warn(warning_msg, FutureWarning) + max_level = max_iter + + if max_level is None: + max_level = 100 + # Initialize dask client client = default_client() @@ -125,7 +166,8 @@ def louvain( _call_plc_louvain, Comms.get_session_id(), input_graph._plc_graph[w], - max_iter, + max_level, + threshold, resolution, do_expensive_check, workers=[w], diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py index b276a87b88e..877eade7708 100644 --- a/python/cugraph/cugraph/datasets/dataset.py +++ b/python/cugraph/cugraph/datasets/dataset.py @@ -266,6 +266,42 @@ def get_path(self): return self._path.absolute() + def is_directed(self): + """ + Returns True if the graph is a directed graph. + """ + return self.metadata["is_directed"] + + def is_multigraph(self): + """ + Returns True if the graph is a multigraph. + """ + return self.metadata["is_multigraph"] + + def is_symmetric(self): + """ + Returns True if the graph is symmetric. + """ + return self.metadata["is_symmetric"] + + def number_of_nodes(self): + """ + An alias of number_of_vertices() + """ + return self.number_of_vertices() + + def number_of_vertices(self): + """ + Get the number of vertices in the graph. + """ + return self.metadata["number_of_nodes"] + + def number_of_edges(self): + """ + Get the number of edges in the graph. + """ + return self.metadata["number_of_edges"] + def download_all(force=False): """ diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py index 9d9572b88b2..a8d3e2fc7ec 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank.py @@ -65,7 +65,7 @@ def cugraph_call(G, max_iter, tol, alpha, personalization, nstart, pre_vtx_o_wgt # need a different function since the Nx version returns a dictionary -def cugraph_nx_call(G, max_iter, tol, alpha, personalization, nstart): +def nx_cugraph_call(G, max_iter, tol, alpha, personalization, nstart): # cugraph Pagerank Call t1 = time.time() pr = cugraph.pagerank( @@ -238,7 +238,7 @@ def test_pagerank_nx(graph_file, max_iter, tol, alpha, personalization_perc, has cu_prsn = cudify(networkx_prsn) # cuGraph PageRank with Nx Graph - cugraph_pr = cugraph_nx_call(Gnx, max_iter, tol, alpha, cu_prsn, cu_nstart) + cugraph_pr = nx_cugraph_call(Gnx, max_iter, tol, alpha, cu_prsn, cu_nstart) # Calculating mismatch networkx_pr = sorted(networkx_pr.items(), key=lambda x: x[0]) diff --git a/python/cugraph/cugraph/tests/utils/test_dataset.py b/python/cugraph/cugraph/tests/utils/test_dataset.py index 643d0468d46..c2a4f7c6072 100644 --- a/python/cugraph/cugraph/tests/utils/test_dataset.py +++ b/python/cugraph/cugraph/tests/utils/test_dataset.py @@ -328,6 +328,16 @@ def test_is_multigraph(dataset): assert G.is_multigraph() == dataset.metadata["is_multigraph"] +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_object_getters(dataset): + assert dataset.is_directed() == dataset.metadata["is_directed"] + assert dataset.is_multigraph() == dataset.metadata["is_multigraph"] + assert dataset.is_symmetric() == dataset.metadata["is_symmetric"] + assert dataset.number_of_nodes() == dataset.metadata["number_of_nodes"] + assert dataset.number_of_vertices() == dataset.metadata["number_of_nodes"] + assert dataset.number_of_edges() == dataset.metadata["number_of_edges"] + + # # Test experimental for DeprecationWarnings # diff --git a/python/cugraph-nx/.flake8 b/python/nx-cugraph/.flake8 similarity index 88% rename from python/cugraph-nx/.flake8 rename to python/nx-cugraph/.flake8 index f66815e8507..3a2e3fb8617 100644 --- a/python/cugraph-nx/.flake8 +++ b/python/nx-cugraph/.flake8 @@ -9,5 +9,5 @@ extend-ignore = SIM401, # E203 whitespace before ':' (to be compatible with black) per-file-ignores = - cugraph_nx/tests/*.py:T201, + nx_cugraph/tests/*.py:T201, __init__.py:F401,F403, diff --git a/python/cugraph-nx/LICENSE b/python/nx-cugraph/LICENSE similarity index 100% rename from python/cugraph-nx/LICENSE rename to python/nx-cugraph/LICENSE diff --git a/python/cugraph-nx/Makefile b/python/nx-cugraph/Makefile similarity index 100% rename from python/cugraph-nx/Makefile rename to python/nx-cugraph/Makefile diff --git a/python/cugraph-nx/README.md b/python/nx-cugraph/README.md similarity index 72% rename from python/cugraph-nx/README.md rename to python/nx-cugraph/README.md index 2137fdb6472..e7cd26218e6 100644 --- a/python/cugraph-nx/README.md +++ b/python/nx-cugraph/README.md @@ -1,24 +1,24 @@ -# cugraph-nx +# nx-cugraph ## Description -[RAPIDS](https://rapids.ai) cugraph-nx is a [backend to NetworkX](https://networkx.org/documentation/stable/reference/classes/index.html#backends) +[RAPIDS](https://rapids.ai) nx-cugraph is a [backend to NetworkX](https://networkx.org/documentation/stable/reference/classes/index.html#backends) with minimal dependencies (`networkx`, `cupy`, and `pylibcugraph`) to run graph algorithms on the GPU. ### Contribute Follow instructions for [contributing to cugraph](https://github.com/rapidsai/cugraph/blob/branch-23.10/readme_pages/CONTRIBUTING.md) -and [building from source](https://docs.rapids.ai/api/cugraph/stable/installation/source_build/), then build cugraph-nx in develop (i.e., editable) mode: +and [building from source](https://docs.rapids.ai/api/cugraph/stable/installation/source_build/), then build nx-cugraph in develop (i.e., editable) mode: ``` -$ ./build.sh cugraph-nx --pydevelop +$ ./build.sh nx-cugraph --pydevelop ``` ### Run tests -Run cugraph-nx tests from `cugraph/python/cugraph-nx` directory: +Run nx-cugraph tests from `cugraph/python/nx-cugraph` directory: ``` $ pytest ``` -Run cugraph-nx benchmarks: +Run nx-cugraph benchmarks: ``` $ pytest --bench ``` diff --git a/python/cugraph-nx/conftest.py b/python/nx-cugraph/conftest.py similarity index 100% rename from python/cugraph-nx/conftest.py rename to python/nx-cugraph/conftest.py diff --git a/python/cugraph-nx/lint.yaml b/python/nx-cugraph/lint.yaml similarity index 98% rename from python/cugraph-nx/lint.yaml rename to python/nx-cugraph/lint.yaml index 42c1b9657c7..dba061bd6b5 100644 --- a/python/cugraph-nx/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -74,7 +74,7 @@ repos: - id: codespell types_or: [python, rst, markdown] additional_dependencies: [tomli] - files: ^(cugraph_nx|docs)/ + files: ^(nx_cugraph|docs)/ - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.0.286 hooks: diff --git a/python/cugraph-nx/cugraph_nx/__init__.py b/python/nx-cugraph/nx_cugraph/__init__.py similarity index 100% rename from python/cugraph-nx/cugraph_nx/__init__.py rename to python/nx-cugraph/nx_cugraph/__init__.py diff --git a/python/cugraph-nx/cugraph_nx/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py similarity index 100% rename from python/cugraph-nx/cugraph_nx/algorithms/__init__.py rename to python/nx-cugraph/nx_cugraph/algorithms/__init__.py diff --git a/python/cugraph-nx/cugraph_nx/algorithms/centrality/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/__init__.py similarity index 100% rename from python/cugraph-nx/cugraph_nx/algorithms/centrality/__init__.py rename to python/nx-cugraph/nx_cugraph/algorithms/centrality/__init__.py diff --git a/python/cugraph-nx/cugraph_nx/algorithms/centrality/betweenness.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py similarity index 94% rename from python/cugraph-nx/cugraph_nx/algorithms/centrality/betweenness.py rename to python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py index a5962a52865..b777919f86f 100644 --- a/python/cugraph-nx/cugraph_nx/algorithms/centrality/betweenness.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py @@ -12,8 +12,8 @@ # limitations under the License. import pylibcugraph as plc -from cugraph_nx.convert import _to_graph -from cugraph_nx.utils import networkx_algorithm +from nx_cugraph.convert import _to_graph +from nx_cugraph.utils import _handle_seed, networkx_algorithm __all__ = ["betweenness_centrality", "edge_betweenness_centrality"] @@ -26,6 +26,7 @@ def betweenness_centrality( raise NotImplementedError( "Weighted implementation of betweenness centrality not currently supported" ) + seed = _handle_seed(seed) G = _to_graph(G, weight) node_ids, values = plc.betweenness_centrality( resource_handle=plc.ResourceHandle(), diff --git a/python/cugraph-nx/cugraph_nx/algorithms/community/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/community/__init__.py similarity index 100% rename from python/cugraph-nx/cugraph_nx/algorithms/community/__init__.py rename to python/nx-cugraph/nx_cugraph/algorithms/community/__init__.py diff --git a/python/cugraph-nx/cugraph_nx/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py similarity index 89% rename from python/cugraph-nx/cugraph_nx/algorithms/community/louvain.py rename to python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py index 476f7428aab..ca5f05c2014 100644 --- a/python/cugraph-nx/cugraph_nx/algorithms/community/louvain.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py @@ -14,8 +14,13 @@ import pylibcugraph as plc -from cugraph_nx.convert import _to_undirected_graph -from cugraph_nx.utils import _groupby, networkx_algorithm, not_implemented_for +from nx_cugraph.convert import _to_undirected_graph +from nx_cugraph.utils import ( + _groupby, + _handle_seed, + networkx_algorithm, + not_implemented_for, +) __all__ = ["louvain_communities"] @@ -30,6 +35,7 @@ def louvain_communities( Extra parameter: `max_level` controls the maximum number of levels of the algorithm. """ # NetworkX allows both directed and undirected, but cugraph only allows undirected. + seed = _handle_seed(seed) # Unused, but ensure it's valid for future compatibility G = _to_undirected_graph(G, weight) if G.row_indices.size == 0: # TODO: PLC doesn't handle empty graphs gracefully! diff --git a/python/cugraph-nx/cugraph_nx/classes/__init__.py b/python/nx-cugraph/nx_cugraph/classes/__init__.py similarity index 100% rename from python/cugraph-nx/cugraph_nx/classes/__init__.py rename to python/nx-cugraph/nx_cugraph/classes/__init__.py diff --git a/python/cugraph-nx/cugraph_nx/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py similarity index 92% rename from python/cugraph-nx/cugraph_nx/classes/digraph.py rename to python/nx-cugraph/nx_cugraph/classes/digraph.py index 0cea08f3e77..0aaf88fd793 100644 --- a/python/cugraph-nx/cugraph_nx/classes/digraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/digraph.py @@ -16,16 +16,16 @@ import networkx as nx -import cugraph_nx as cnx +import nx_cugraph as nxcg from .graph import Graph if TYPE_CHECKING: - from cugraph_nx.typing import NodeKey + from nx_cugraph.typing import NodeKey __all__ = ["DiGraph"] -networkx_api = cnx.utils.decorators.networkx_class(nx.DiGraph) +networkx_api = nxcg.utils.decorators.networkx_class(nx.DiGraph) class DiGraph(Graph): diff --git a/python/cugraph-nx/cugraph_nx/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py similarity index 97% rename from python/cugraph-nx/cugraph_nx/classes/graph.py rename to python/nx-cugraph/nx_cugraph/classes/graph.py index 5604f2457f8..1432f68c752 100644 --- a/python/cugraph-nx/cugraph_nx/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -21,12 +21,12 @@ import numpy as np import pylibcugraph as plc -import cugraph_nx as cnx +import nx_cugraph as nxcg if TYPE_CHECKING: from collections.abc import Iterable, Iterator - from cugraph_nx.typing import ( + from nx_cugraph.typing import ( AttrKey, Dtype, EdgeTuple, @@ -38,11 +38,11 @@ __all__ = ["Graph"] -networkx_api = cnx.utils.decorators.networkx_class(nx.Graph) +networkx_api = nxcg.utils.decorators.networkx_class(nx.Graph) class Graph: - # Tell networkx to dispatch calls with this object to cugraph-nx + # Tell networkx to dispatch calls with this object to nx-cugraph __networkx_plugin__: ClassVar[str] = "cugraph" # networkx properties @@ -248,7 +248,7 @@ def __new__(cls, incoming_graph_data=None, **attr) -> Graph: elif incoming_graph_data.__class__ is new_graph.__class__: new_graph = incoming_graph_data.copy() elif incoming_graph_data.__class__ is new_graph.to_networkx_class(): - new_graph = cnx.from_networkx(incoming_graph_data, preserve_all_attrs=True) + new_graph = nxcg.from_networkx(incoming_graph_data, preserve_all_attrs=True) else: raise NotImplementedError new_graph.graph.update(attr) @@ -270,8 +270,8 @@ def is_multigraph(cls) -> bool: @classmethod @networkx_api - def to_directed_class(cls) -> type[cnx.DiGraph]: - return cnx.DiGraph + def to_directed_class(cls) -> type[nxcg.DiGraph]: + return nxcg.DiGraph @classmethod def to_networkx_class(cls) -> type[nx.Graph]: @@ -428,7 +428,7 @@ def size(self, weight: AttrKey | None = None) -> int: return int((self.row_indices <= self.col_indices).sum()) @networkx_api - def to_directed(self, as_view: bool = False) -> cnx.DiGraph: + def to_directed(self, as_view: bool = False) -> nxcg.DiGraph: return self._copy(as_view, self.to_directed_class()) @networkx_api diff --git a/python/cugraph-nx/cugraph_nx/convert.py b/python/nx-cugraph/nx_cugraph/convert.py similarity index 94% rename from python/cugraph-nx/cugraph_nx/convert.py rename to python/nx-cugraph/nx_cugraph/convert.py index 530dd700f35..9be8cac7877 100644 --- a/python/cugraph-nx/cugraph_nx/convert.py +++ b/python/nx-cugraph/nx_cugraph/convert.py @@ -22,10 +22,10 @@ import networkx as nx import numpy as np -import cugraph_nx as cnx +import nx_cugraph as nxcg if TYPE_CHECKING: - from cugraph_nx.typing import AttrKey, Dtype, EdgeValue, NodeValue + from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue __all__ = [ "from_networkx", @@ -51,8 +51,8 @@ def from_networkx( as_directed: bool = False, name: str | None = None, graph_name: str | None = None, -) -> cnx.Graph: - """Convert a networkx graph to cugraph_nx graph; can convert all attributes. +) -> nxcg.Graph: + """Convert a networkx graph to nx_cugraph graph; can convert all attributes. Parameters ---------- @@ -61,7 +61,7 @@ def from_networkx( Dict that maps edge attributes to default values if missing in ``G``. If None, then no edge attributes will be converted. If default value is None, then missing values are handled with a mask. - A default value of ``cnx.convert.REQUIRED`` or ``...`` indicates that + A default value of ``nxcg.convert.REQUIRED`` or ``...`` indicates that all edges have data for this attribute, and raise `KeyError` if not. For convenience, `edge_attrs` may be a single attribute with default 1; for example ``edge_attrs="weight"``. @@ -70,7 +70,7 @@ def from_networkx( Dict that maps node attributes to default values if missing in ``G``. If None, then no node attributes will be converted. If default value is None, then missing values are handled with a mask. - A default value of ``cnx.convert.REQUIRED`` or ``...`` indicates that + A default value of ``nxcg.convert.REQUIRED`` or ``...`` indicates that all edges have data for this attribute, and raise `KeyError` if not. For convenience, `node_attrs` may be a single attribute with no default; for example ``node_attrs="weight"``. @@ -94,7 +94,7 @@ def from_networkx( Returns ------- - cugraph_nx.Graph + nx_cugraph.Graph Notes ----- @@ -111,7 +111,7 @@ def from_networkx( See Also -------- - to_networkx : The opposite; convert cugraph_nx graph to networkx graph + to_networkx : The opposite; convert nx_cugraph graph to networkx graph """ # This uses `graph._adj` and `graph._node`, which are private attributes in NetworkX if not isinstance(graph, nx.Graph): @@ -352,9 +352,9 @@ def from_networkx( # if vals.ndim > 1: ... if graph.is_directed() or as_directed: - klass = cnx.DiGraph + klass = nxcg.DiGraph else: - klass = cnx.Graph + klass = nxcg.Graph rv = klass.from_coo( N, row_indices, @@ -398,14 +398,14 @@ def _iter_attr_dicts( return full_dicts -def to_networkx(G: cnx.Graph) -> nx.Graph: - """Convert a cugraph_nx graph to networkx graph. +def to_networkx(G: nxcg.Graph) -> nx.Graph: + """Convert a nx_cugraph graph to networkx graph. All edge and node attributes and ``G.graph`` properties are converted. Parameters ---------- - G : cugraph_nx.Graph + G : nx_cugraph.Graph Returns ------- @@ -413,7 +413,7 @@ def to_networkx(G: cnx.Graph) -> nx.Graph: See Also -------- - from_networkx : The opposite; convert networkx graph to cugraph_nx graph + from_networkx : The opposite; convert networkx graph to nx_cugraph graph """ rv = G.to_networkx_class()() id_to_key = G.id_to_key @@ -463,13 +463,13 @@ def _to_graph( edge_attr: AttrKey | None = None, edge_default: EdgeValue | None = 1, edge_dtype: Dtype | None = None, -) -> cnx.Graph | cnx.DiGraph: - """Ensure that input type is a cugraph_nx graph, and convert if necessary. +) -> nxcg.Graph | nxcg.DiGraph: + """Ensure that input type is a nx_cugraph graph, and convert if necessary. Directed and undirected graphs are both allowed. This is an internal utility function and may change or be removed. """ - if isinstance(G, cnx.Graph): + if isinstance(G, nxcg.Graph): return G if isinstance(G, nx.Graph): return from_networkx( @@ -484,15 +484,15 @@ def _to_directed_graph( edge_attr: AttrKey | None = None, edge_default: EdgeValue | None = 1, edge_dtype: Dtype | None = None, -) -> cnx.DiGraph: - """Ensure that input type is a cugraph_nx DiGraph, and convert if necessary. +) -> nxcg.DiGraph: + """Ensure that input type is a nx_cugraph DiGraph, and convert if necessary. Undirected graphs will be converted to directed. This is an internal utility function and may change or be removed. """ - if isinstance(G, cnx.DiGraph): + if isinstance(G, nxcg.DiGraph): return G - if isinstance(G, cnx.Graph): + if isinstance(G, nxcg.Graph): return G.to_directed() if isinstance(G, nx.Graph): return from_networkx( @@ -510,13 +510,13 @@ def _to_undirected_graph( edge_attr: AttrKey | None = None, edge_default: EdgeValue | None = 1, edge_dtype: Dtype | None = None, -) -> cnx.Graph: - """Ensure that input type is a cugraph_nx Graph, and convert if necessary. +) -> nxcg.Graph: + """Ensure that input type is a nx_cugraph Graph, and convert if necessary. Only undirected graphs are allowed. Directed graphs will raise ValueError. This is an internal utility function and may change or be removed. """ - if isinstance(G, cnx.Graph): + if isinstance(G, nxcg.Graph): if G.is_directed(): raise ValueError("Only undirected graphs supported; got a directed graph") return G diff --git a/python/cugraph-nx/cugraph_nx/interface.py b/python/nx-cugraph/nx_cugraph/interface.py similarity index 97% rename from python/cugraph-nx/cugraph_nx/interface.py rename to python/nx-cugraph/nx_cugraph/interface.py index 198fdd09cfc..cc750cd2d5b 100644 --- a/python/cugraph-nx/cugraph_nx/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -14,7 +14,7 @@ import networkx as nx -import cugraph_nx as cnx +import nx_cugraph as nxcg class BackendInterface: @@ -29,12 +29,12 @@ def convert_from_nx(graph, *args, edge_attrs=None, weight=None, **kwargs): "edge_attrs and weight arguments should not both be given" ) edge_attrs = {weight: 1} - return cnx.from_networkx(graph, *args, edge_attrs=edge_attrs, **kwargs) + return nxcg.from_networkx(graph, *args, edge_attrs=edge_attrs, **kwargs) @staticmethod def convert_to_nx(obj, *, name: str | None = None): - if isinstance(obj, cnx.Graph): - return cnx.to_networkx(obj) + if isinstance(obj, nxcg.Graph): + return nxcg.to_networkx(obj) return obj @staticmethod diff --git a/python/cugraph-nx/cugraph_nx/tests/__init__.py b/python/nx-cugraph/nx_cugraph/tests/__init__.py similarity index 100% rename from python/cugraph-nx/cugraph_nx/tests/__init__.py rename to python/nx-cugraph/nx_cugraph/tests/__init__.py diff --git a/python/cugraph-nx/cugraph_nx/tests/bench_convert.py b/python/nx-cugraph/nx_cugraph/tests/bench_convert.py similarity index 92% rename from python/cugraph-nx/cugraph_nx/tests/bench_convert.py rename to python/nx-cugraph/nx_cugraph/tests/bench_convert.py index 85ef66ac918..7e6278661c2 100644 --- a/python/cugraph-nx/cugraph_nx/tests/bench_convert.py +++ b/python/nx-cugraph/nx_cugraph/tests/bench_convert.py @@ -16,7 +16,7 @@ import numpy as np import pytest -import cugraph_nx as cnx +import nx_cugraph as nxcg try: import cugraph @@ -50,19 +50,22 @@ def _bench_helper(gpubenchmark, N, attr_kind, create_using, method): continue edgedict["x"] = random.randint(0, 100000) if attr_kind == "preserve": - gpubenchmark(cnx.from_networkx, G, preserve_edge_attrs=True) + gpubenchmark(nxcg.from_networkx, G, preserve_edge_attrs=True) elif attr_kind == "half_missing": - gpubenchmark(cnx.from_networkx, G, edge_attrs={"x": None}) + gpubenchmark(nxcg.from_networkx, G, edge_attrs={"x": None}) elif attr_kind == "required": - gpubenchmark(cnx.from_networkx, G, edge_attrs={"x": ...}) + gpubenchmark(nxcg.from_networkx, G, edge_attrs={"x": ...}) elif attr_kind == "required_dtype": gpubenchmark( - cnx.from_networkx, G, edge_attrs={"x": ...}, edge_dtypes={"x": np.int32} + nxcg.from_networkx, + G, + edge_attrs={"x": ...}, + edge_dtypes={"x": np.int32}, ) else: # full, half_default - gpubenchmark(cnx.from_networkx, G, edge_attrs={"x": 0}) + gpubenchmark(nxcg.from_networkx, G, edge_attrs={"x": 0}) else: - gpubenchmark(cnx.from_networkx, G) + gpubenchmark(nxcg.from_networkx, G) def _bench_helper_cugraph( diff --git a/python/cugraph-nx/cugraph_nx/tests/conftest.py b/python/nx-cugraph/nx_cugraph/tests/conftest.py similarity index 100% rename from python/cugraph-nx/cugraph_nx/tests/conftest.py rename to python/nx-cugraph/nx_cugraph/tests/conftest.py diff --git a/python/nx-cugraph/nx_cugraph/tests/test_convert.py b/python/nx-cugraph/nx_cugraph/tests/test_convert.py new file mode 100644 index 00000000000..ba3cd7aaee1 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_convert.py @@ -0,0 +1,203 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp +import networkx as nx +import pytest + +import nx_cugraph as nxcg +from nx_cugraph import interface + + +@pytest.mark.parametrize("graph_class", [nx.Graph, nx.DiGraph]) +@pytest.mark.parametrize( + "kwargs", + [ + {}, + {"preserve_edge_attrs": True}, + {"preserve_node_attrs": True}, + {"preserve_all_attrs": True}, + {"edge_attrs": {"x": 0}}, + {"edge_attrs": {"x": None}}, + {"edge_attrs": {"x": nxcg.convert.REQUIRED}}, + {"edge_attrs": {"x": ...}}, # sugar for REQUIRED + {"edge_attrs": "x"}, + {"node_attrs": {"x": 0}}, + {"node_attrs": {"x": None}}, + {"node_attrs": {"x": nxcg.convert.REQUIRED}}, + {"node_attrs": {"x": ...}}, # sugar for REQUIRED + {"node_attrs": "x"}, + ], +) +def test_convert_empty(graph_class, kwargs): + G = graph_class() + Gcg = nxcg.from_networkx(G, **kwargs) + H = nxcg.to_networkx(Gcg) + assert G.number_of_nodes() == Gcg.number_of_nodes() == H.number_of_nodes() == 0 + assert G.number_of_edges() == Gcg.number_of_edges() == H.number_of_edges() == 0 + assert Gcg.edge_values == Gcg.edge_masks == Gcg.node_values == Gcg.node_masks == {} + assert G.graph == Gcg.graph == H.graph == {} + + +def test_convert(): + # FIXME: can we break this into smaller tests? + G = nx.Graph() + G.add_edge(0, 1, x=2) + G.add_node(0, foo=10) + G.add_node(1, foo=20, bar=100) + for kwargs in [ + {"preserve_edge_attrs": True}, + {"preserve_all_attrs": True}, + {"edge_attrs": {"x": 0}}, + {"edge_attrs": {"x": None}, "node_attrs": {"bar": None}}, + {"edge_attrs": "x", "edge_dtypes": int}, + { + "edge_attrs": {"x": nxcg.convert.REQUIRED}, + "node_attrs": {"foo": nxcg.convert.REQUIRED}, + }, + {"edge_attrs": {"x": ...}, "node_attrs": {"foo": ...}}, # sugar for REQUIRED + ]: + # All edges have "x" attribute, so all kwargs are equivalent + Gcg = nxcg.from_networkx(G, **kwargs) + cp.testing.assert_array_equal(Gcg.row_indices, [0, 1]) + cp.testing.assert_array_equal(Gcg.col_indices, [1, 0]) + cp.testing.assert_array_equal(Gcg.edge_values["x"], [2, 2]) + assert len(Gcg.edge_values) == 1 + assert Gcg.edge_masks == {} + H = nxcg.to_networkx(Gcg) + assert G.number_of_nodes() == Gcg.number_of_nodes() == H.number_of_nodes() == 2 + assert G.number_of_edges() == Gcg.number_of_edges() == H.number_of_edges() == 1 + assert G.adj == H.adj + + with pytest.raises(KeyError, match="bar"): + nxcg.from_networkx(G, node_attrs={"bar": ...}) + + # Structure-only graph (no edge attributes) + Gcg = nxcg.from_networkx(G, preserve_node_attrs=True) + cp.testing.assert_array_equal(Gcg.row_indices, [0, 1]) + cp.testing.assert_array_equal(Gcg.col_indices, [1, 0]) + cp.testing.assert_array_equal(Gcg.node_values["foo"], [10, 20]) + assert Gcg.edge_values == Gcg.edge_masks == {} + H = nxcg.to_networkx(Gcg) + assert set(G.edges) == set(H.edges) == {(0, 1)} + assert G.nodes == H.nodes + + # Fill completely missing attribute with default value + Gcg = nxcg.from_networkx(G, edge_attrs={"y": 0}) + cp.testing.assert_array_equal(Gcg.row_indices, [0, 1]) + cp.testing.assert_array_equal(Gcg.col_indices, [1, 0]) + cp.testing.assert_array_equal(Gcg.edge_values["y"], [0, 0]) + assert len(Gcg.edge_values) == 1 + assert Gcg.edge_masks == Gcg.node_values == Gcg.node_masks == {} + H = nxcg.to_networkx(Gcg) + assert list(H.edges(data=True)) == [(0, 1, {"y": 0})] + + # If attribute is completely missing (and no default), then just ignore it + Gcg = nxcg.from_networkx(G, edge_attrs={"y": None}) + cp.testing.assert_array_equal(Gcg.row_indices, [0, 1]) + cp.testing.assert_array_equal(Gcg.col_indices, [1, 0]) + assert sorted(Gcg.edge_values) == sorted(Gcg.edge_masks) == [] + H = nxcg.to_networkx(Gcg) + assert list(H.edges(data=True)) == [(0, 1, {})] + + G.add_edge(0, 2) + # Some edges are missing 'x' attribute; need to use a mask + for kwargs in [{"preserve_edge_attrs": True}, {"edge_attrs": {"x": None}}]: + Gcg = nxcg.from_networkx(G, **kwargs) + cp.testing.assert_array_equal(Gcg.row_indices, [0, 0, 1, 2]) + cp.testing.assert_array_equal(Gcg.col_indices, [1, 2, 0, 0]) + assert sorted(Gcg.edge_values) == sorted(Gcg.edge_masks) == ["x"] + cp.testing.assert_array_equal(Gcg.edge_masks["x"], [True, False, True, False]) + cp.testing.assert_array_equal(Gcg.edge_values["x"][Gcg.edge_masks["x"]], [2, 2]) + H = nxcg.to_networkx(Gcg) + assert list(H.edges(data=True)) == [(0, 1, {"x": 2}), (0, 2, {})] + + with pytest.raises(KeyError, match="x"): + nxcg.from_networkx(G, edge_attrs={"x": nxcg.convert.REQUIRED}) + with pytest.raises(KeyError, match="x"): + nxcg.from_networkx(G, edge_attrs={"x": ...}) + with pytest.raises(KeyError, match="bar"): + nxcg.from_networkx(G, node_attrs={"bar": nxcg.convert.REQUIRED}) + with pytest.raises(KeyError, match="bar"): + nxcg.from_networkx(G, node_attrs={"bar": ...}) + + # Now for something more complicated... + G = nx.Graph() + G.add_edge(10, 20, x=1) + G.add_edge(10, 30, x=2, y=1.5) + G.add_node(10, foo=100) + G.add_node(20, foo=200, bar=1000) + G.add_node(30, foo=300) + # Some edges have masks, some don't + for kwargs in [ + {"preserve_edge_attrs": True}, + {"preserve_all_attrs": True}, + {"edge_attrs": {"x": None, "y": None}}, + {"edge_attrs": {"x": 0, "y": None}}, + {"edge_attrs": {"x": 0, "y": None}}, + {"edge_attrs": {"x": 0, "y": None}, "edge_dtypes": {"x": int, "y": float}}, + ]: + Gcg = nxcg.from_networkx(G, **kwargs) + assert Gcg.id_to_key == {0: 10, 1: 20, 2: 30} # Remap node IDs to 0, 1, ... + cp.testing.assert_array_equal(Gcg.row_indices, [0, 0, 1, 2]) + cp.testing.assert_array_equal(Gcg.col_indices, [1, 2, 0, 0]) + cp.testing.assert_array_equal(Gcg.edge_values["x"], [1, 2, 1, 2]) + assert sorted(Gcg.edge_masks) == ["y"] + cp.testing.assert_array_equal(Gcg.edge_masks["y"], [False, True, False, True]) + cp.testing.assert_array_equal( + Gcg.edge_values["y"][Gcg.edge_masks["y"]], [1.5, 1.5] + ) + H = nxcg.to_networkx(Gcg) + assert G.adj == H.adj + + # Some nodes have masks, some don't + for kwargs in [ + {"preserve_node_attrs": True}, + {"preserve_all_attrs": True}, + {"node_attrs": {"foo": None, "bar": None}}, + {"node_attrs": {"foo": None, "bar": None}}, + {"node_attrs": {"foo": 0, "bar": None, "missing": None}}, + ]: + Gcg = nxcg.from_networkx(G, **kwargs) + assert Gcg.id_to_key == {0: 10, 1: 20, 2: 30} # Remap node IDs to 0, 1, ... + cp.testing.assert_array_equal(Gcg.row_indices, [0, 0, 1, 2]) + cp.testing.assert_array_equal(Gcg.col_indices, [1, 2, 0, 0]) + cp.testing.assert_array_equal(Gcg.node_values["foo"], [100, 200, 300]) + assert sorted(Gcg.node_masks) == ["bar"] + cp.testing.assert_array_equal(Gcg.node_masks["bar"], [False, True, False]) + cp.testing.assert_array_equal( + Gcg.node_values["bar"][Gcg.node_masks["bar"]], [1000] + ) + H = nxcg.to_networkx(Gcg) + assert G.nodes == H.nodes + + # Check default values for nodes + for kwargs in [ + {"node_attrs": {"foo": None, "bar": 0}}, + {"node_attrs": {"foo": None, "bar": 0, "missing": None}}, + {"node_attrs": {"bar": 0}}, + {"node_attrs": {"bar": 0}, "node_dtypes": {"bar": int}}, + {"node_attrs": {"bar": 0, "foo": None}, "node_dtypes": int}, + ]: + Gcg = nxcg.from_networkx(G, **kwargs) + assert Gcg.id_to_key == {0: 10, 1: 20, 2: 30} # Remap node IDs to 0, 1, ... + cp.testing.assert_array_equal(Gcg.row_indices, [0, 0, 1, 2]) + cp.testing.assert_array_equal(Gcg.col_indices, [1, 2, 0, 0]) + cp.testing.assert_array_equal(Gcg.node_values["bar"], [0, 1000, 0]) + assert Gcg.node_masks == {} + + with pytest.raises( + TypeError, match="edge_attrs and weight arguments should not both be given" + ): + interface.BackendInterface.convert_from_nx(G, edge_attrs={"x": 1}, weight="x") + with pytest.raises(TypeError, match="Expected networkx.Graph"): + nxcg.from_networkx({}) diff --git a/python/cugraph-nx/cugraph_nx/tests/test_match_api.py b/python/nx-cugraph/nx_cugraph/tests/test_match_api.py similarity index 75% rename from python/cugraph-nx/cugraph_nx/tests/test_match_api.py rename to python/nx-cugraph/nx_cugraph/tests/test_match_api.py index 918c18b4ce3..64d3704dd65 100644 --- a/python/cugraph-nx/cugraph_nx/tests/test_match_api.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_match_api.py @@ -15,13 +15,13 @@ import networkx as nx -import cugraph_nx as cnx -from cugraph_nx.utils import networkx_algorithm +import nx_cugraph as nxcg +from nx_cugraph.utils import networkx_algorithm def test_match_signature_and_names(): """Simple test to ensure our signatures and basic module layout match networkx.""" - for name, func in vars(cnx.interface.BackendInterface).items(): + for name, func in vars(nxcg.interface.BackendInterface).items(): if not isinstance(func, networkx_algorithm): continue @@ -44,7 +44,7 @@ def test_match_signature_and_names(): if not func.extra_params: assert orig_sig == func_sig else: - # Ignore extra parameters added to cugraph-nx algorithm + # Ignore extra parameters added to nx-cugraph algorithm assert orig_sig == func_sig.replace( parameters=[ p @@ -52,7 +52,7 @@ def test_match_signature_and_names(): if name not in func.extra_params ] ) - if func.can_run is not cnx.utils.decorators._default_can_run: + if func.can_run is not nxcg.utils.decorators._default_can_run: assert func_sig == inspect.signature(func.can_run) # Matching function names? @@ -74,33 +74,33 @@ def test_match_signature_and_names(): ) # Matching package layout (i.e., which modules have the function)? - cnx_path = func.__module__ + nxcg_path = func.__module__ name = func.__name__ - while "." in cnx_path: + while "." in nxcg_path: # This only walks up the module tree and does not check sibling modules - cnx_path, mod_name = cnx_path.rsplit(".", 1) - nx_path = cnx_path.replace("cugraph_nx", "networkx") - cnx_mod = importlib.import_module(cnx_path) + nxcg_path, mod_name = nxcg_path.rsplit(".", 1) + nx_path = nxcg_path.replace("nx_cugraph", "networkx") + nxcg_mod = importlib.import_module(nxcg_path) nx_mod = importlib.import_module(nx_path) # Is the function present in the current module? - present_in_cnx = hasattr(cnx_mod, name) + present_in_nxcg = hasattr(nxcg_mod, name) present_in_nx = hasattr(nx_mod, name) - if present_in_cnx is not present_in_nx: # pragma: no cover (debug) - if present_in_cnx: + if present_in_nxcg is not present_in_nx: # pragma: no cover (debug) + if present_in_nxcg: raise AssertionError( - f"{name} exists in {cnx_path}, but not in {nx_path}" + f"{name} exists in {nxcg_path}, but not in {nx_path}" ) raise AssertionError( - f"{name} exists in {nx_path}, but not in {cnx_path}" + f"{name} exists in {nx_path}, but not in {nxcg_path}" ) # Is the nested module present in the current module? - present_in_cnx = hasattr(cnx_mod, mod_name) + present_in_nxcg = hasattr(nxcg_mod, mod_name) present_in_nx = hasattr(nx_mod, mod_name) - if present_in_cnx is not present_in_nx: # pragma: no cover (debug) - if present_in_cnx: + if present_in_nxcg is not present_in_nx: # pragma: no cover (debug) + if present_in_nxcg: raise AssertionError( - f"{mod_name} exists in {cnx_path}, but not in {nx_path}" + f"{mod_name} exists in {nxcg_path}, but not in {nx_path}" ) raise AssertionError( - f"{mod_name} exists in {nx_path}, but not in {cnx_path}" + f"{mod_name} exists in {nx_path}, but not in {nxcg_path}" ) diff --git a/python/cugraph-nx/cugraph_nx/typing.py b/python/nx-cugraph/nx_cugraph/typing.py similarity index 100% rename from python/cugraph-nx/cugraph_nx/typing.py rename to python/nx-cugraph/nx_cugraph/typing.py diff --git a/python/cugraph-nx/cugraph_nx/utils/__init__.py b/python/nx-cugraph/nx_cugraph/utils/__init__.py similarity index 100% rename from python/cugraph-nx/cugraph_nx/utils/__init__.py rename to python/nx-cugraph/nx_cugraph/utils/__init__.py diff --git a/python/cugraph-nx/cugraph_nx/utils/decorators.py b/python/nx-cugraph/nx_cugraph/utils/decorators.py similarity index 97% rename from python/cugraph-nx/cugraph_nx/utils/decorators.py rename to python/nx-cugraph/nx_cugraph/utils/decorators.py index 619c9610c5d..3dbdb07e87f 100644 --- a/python/cugraph-nx/cugraph_nx/utils/decorators.py +++ b/python/nx-cugraph/nx_cugraph/utils/decorators.py @@ -14,7 +14,7 @@ from networkx.utils.decorators import not_implemented_for -from cugraph_nx.interface import BackendInterface +from nx_cugraph.interface import BackendInterface __all__ = ["not_implemented_for", "networkx_algorithm"] diff --git a/python/cugraph-nx/cugraph_nx/utils/misc.py b/python/nx-cugraph/nx_cugraph/utils/misc.py similarity index 78% rename from python/cugraph-nx/cugraph_nx/utils/misc.py rename to python/nx-cugraph/nx_cugraph/utils/misc.py index 18487a05996..64c0be066f2 100644 --- a/python/cugraph-nx/cugraph_nx/utils/misc.py +++ b/python/nx-cugraph/nx_cugraph/utils/misc.py @@ -10,9 +10,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +import operator as op +import sys +from random import Random + import cupy as cp -__all__ = ["_groupby"] +__all__ = ["_groupby", "_handle_seed"] def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]: @@ -43,3 +49,12 @@ def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]: rv[i] = sorted_values[start:end] start = end return rv + + +def _handle_seed(seed: int | Random | None) -> int: + """Handle seed argument and ensure it is what pylibcugraph needs: an int.""" + if seed is None: + return + if isinstance(seed, Random): + return seed.randint(0, sys.maxsize) + return op.index(seed) # Ensure seed is integral diff --git a/python/cugraph-nx/pyproject.toml b/python/nx-cugraph/pyproject.toml similarity index 96% rename from python/cugraph-nx/pyproject.toml rename to python/nx-cugraph/pyproject.toml index 1882bed251f..95e9c256e5d 100644 --- a/python/cugraph-nx/pyproject.toml +++ b/python/nx-cugraph/pyproject.toml @@ -9,7 +9,7 @@ requires = [ build-backend = "setuptools.build_meta" [project] -name = "cugraph-nx" +name = "nx-cugraph" version = "23.10.00" description = "cugraph backend for NetworkX" readme = { file = "README.md", content-type = "text/markdown" } @@ -52,15 +52,15 @@ Homepage = "https://github.com/rapidsai/cugraph" Documentation = "https://docs.rapids.ai/api/cugraph/stable/" [project.entry-points."networkx.plugins"] -cugraph = "cugraph_nx.interface:BackendInterface" +cugraph = "nx_cugraph.interface:BackendInterface" [tool.setuptools] license-files = ["LICENSE"] [tool.setuptools.packages.find] include = [ - "cugraph_nx*", - "cugraph_nx.*", + "nx_cugraph*", + "nx_cugraph.*", ] [tool.black] @@ -73,12 +73,12 @@ profile = "black" skip_gitignore = true float_to_top = true default_section = "THIRDPARTY" -known_first_party = "cugraph_nx" +known_first_party = "nx_cugraph" line_length = 88 [tool.pytest.ini_options] minversion = "6.0" -testpaths = "cugraph_nx/tests" +testpaths = "nx_cugraph/tests" xfail_strict = true markers = [ "slow: Skipped unless --runslow passed", @@ -109,7 +109,7 @@ addopts = [ [tool.coverage.run] branch = true -source = ["cugraph_nx"] +source = ["nx_cugraph"] omit = [] [tool.coverage.report] @@ -202,7 +202,7 @@ ignore = [ [tool.ruff.per-file-ignores] "__init__.py" = ["F401"] # Allow unused imports (w/o defining `__all__`) # Allow assert, print, RNG, and no docstring -"cugraph_nx/**/tests/*py" = ["S101", "S311", "T201", "D103", "D100"] +"nx_cugraph/**/tests/*py" = ["S101", "S311", "T201", "D103", "D100"] [tool.ruff.flake8-annotations] mypy-init-return = true diff --git a/python/cugraph-nx/run_nx_tests.sh b/python/nx-cugraph/run_nx_tests.sh similarity index 79% rename from python/cugraph-nx/run_nx_tests.sh rename to python/nx-cugraph/run_nx_tests.sh index 7ea2348eaff..07c97cdf947 100755 --- a/python/cugraph-nx/run_nx_tests.sh +++ b/python/nx-cugraph/run_nx_tests.sh @@ -4,21 +4,21 @@ # # NETWORKX_GRAPH_CONVERT=cugraph # Used by networkx versions 3.0 and 3.1 -# Must be set to "cugraph" to test the cugraph-nx backend. +# Must be set to "cugraph" to test the nx-cugraph backend. # # NETWORKX_TEST_BACKEND=cugraph # Replaces NETWORKX_GRAPH_CONVERT for networkx versions >=3.2 -# Must be set to "cugraph" to test the cugraph-nx backend. +# Must be set to "cugraph" to test the nx-cugraph backend. # # NETWORKX_FALLBACK_TO_NX=True (optional) # Used by networkx versions >=3.2. With this set, input graphs will not be -# converted to cugraph-nx and the networkx algorithm will be called for +# converted to nx-cugraph and the networkx algorithm will be called for # algorithms that we don't implement or if we raise NotImplementedError. # This is sometimes helpful to get increased testing and coverage, but # testing takes longer. Without it, tests will xfail when encountering a # function that we don't implement. # -# Coverage of `cugraph_nx.algorithms` is reported and is a good sanity check +# Coverage of `nx_cugraph.algorithms` is reported and is a good sanity check # that algorithms run. # Warning: cugraph has a .coveragerc file in the /python directory, @@ -30,7 +30,7 @@ NETWORKX_TEST_BACKEND=cugraph \ NETWORKX_FALLBACK_TO_NX=True \ pytest \ --pyargs networkx \ - --cov=cugraph_nx.algorithms \ + --cov=nx_cugraph.algorithms \ --cov-report term-missing \ --no-cov-on-fail \ "$@" diff --git a/python/cugraph-nx/setup.py b/python/nx-cugraph/setup.py similarity index 100% rename from python/cugraph-nx/setup.py rename to python/nx-cugraph/setup.py diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd index 67ba43bf611..64944e8773f 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd @@ -98,6 +98,7 @@ cdef extern from "cugraph_c/community_algorithms.h": const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, size_t max_level, + double threshold, double resolution, bool_t do_expensive_check, cugraph_hierarchical_clustering_result_t** result, diff --git a/python/pylibcugraph/pylibcugraph/louvain.pyx b/python/pylibcugraph/pylibcugraph/louvain.pyx index ecae7e700b4..eca569d7da1 100644 --- a/python/pylibcugraph/pylibcugraph/louvain.pyx +++ b/python/pylibcugraph/pylibcugraph/louvain.pyx @@ -51,7 +51,8 @@ from pylibcugraph.utils cimport ( def louvain(ResourceHandle resource_handle, _GPUGraph graph, size_t max_level, - double resolution, + float threshold, + float resolution, bool_t do_expensive_check): """ Compute the modularity optimizing partition of the input graph using the @@ -72,11 +73,16 @@ def louvain(ResourceHandle resource_handle, than the specified number of iterations. No error occurs when the algorithm terminates early in this manner. - resolution: double + threshold: float + Modularity gain threshold for each level. If the gain of + modularity between 2 levels of the algorithm is less than the + given threshold then the algorithm stops and returns the + resulting communities. + + resolution: float Called gamma in the modularity formula, this changes the size of the communities. Higher resolutions lead to more smaller communities, lower resolutions lead to fewer larger communities. - Defaults to 1. do_expensive_check : bool_t If True, performs more extensive tests on the inputs to ensure @@ -100,7 +106,7 @@ def louvain(ResourceHandle resource_handle, ... resource_handle, graph_props, srcs, dsts, weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, clusters, modularity) = pylibcugraph.louvain( - resource_handle, G, 100, 1., False) + resource_handle, G, 100, 1e-7, 1., False) >>> vertices [0, 1, 2] >>> clusters @@ -119,6 +125,7 @@ def louvain(ResourceHandle resource_handle, error_code = cugraph_louvain(c_resource_handle_ptr, c_graph_ptr, max_level, + threshold, resolution, do_expensive_check, &result_ptr, diff --git a/python/pylibcugraph/pylibcugraph/tests/test_louvain.py b/python/pylibcugraph/pylibcugraph/tests/test_louvain.py index d2027a46d9a..adea5e01f15 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_louvain.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -77,6 +77,7 @@ def test_sg_louvain_cupy(): ) max_level = 100 + threshold = 0.0001 resolution = 1.0 sg = SGGraph( @@ -91,7 +92,7 @@ def test_sg_louvain_cupy(): ) vertices, clusters, modularity = louvain( - resource_handle, sg, max_level, resolution, do_expensive_check=False + resource_handle, sg, max_level, threshold, resolution, do_expensive_check=False ) check_results(vertices, clusters, modularity) @@ -130,6 +131,7 @@ def test_sg_louvain_cudf(): ) max_level = 100 + threshold = 0.0001 resolution = 1.0 sg = SGGraph( @@ -144,7 +146,7 @@ def test_sg_louvain_cudf(): ) vertices, clusters, modularity = louvain( - resource_handle, sg, max_level, resolution, do_expensive_check=False + resource_handle, sg, max_level, threshold, resolution, do_expensive_check=False ) check_results(vertices, clusters, modularity)