From 6efb1c628973743c01945ffb724be2271eaede1e Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 27 Feb 2024 13:53:26 -0600 Subject: [PATCH] nx-cugraph: automatically generate trees in README.md (#4156) This updates how we create trees. Also, CI now tests that auto-generated files are up-to-date (not updating these has gotten me a couple of times). Authors: - Erik Welch (https://github.com/eriknw) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cugraph/pull/4156 --- .gitignore | 2 + .pre-commit-config.yaml | 3 +- ci/test_python.sh | 7 + python/nx-cugraph/Makefile | 13 +- python/nx-cugraph/README.md | 270 +++++++++--------- python/nx-cugraph/lint.yaml | 12 +- .../algorithms/link_analysis/hits_alg.py | 6 +- .../algorithms/link_analysis/pagerank_alg.py | 6 +- python/nx-cugraph/nx_cugraph/classes/graph.py | 3 +- .../nx_cugraph/classes/multigraph.py | 4 +- python/nx-cugraph/nx_cugraph/interface.py | 6 +- .../nx_cugraph/scripts/print_table.py | 3 +- .../nx_cugraph/scripts/print_tree.py | 122 +++++--- python/nx-cugraph/pyproject.toml | 13 +- python/nx-cugraph/scripts/update_readme.py | 203 +++++++++++++ 15 files changed, 460 insertions(+), 213 deletions(-) create mode 100644 python/nx-cugraph/scripts/update_readme.py diff --git a/.gitignore b/.gitignore index 358650cfc5a..2fea1022910 100644 --- a/.gitignore +++ b/.gitignore @@ -78,6 +78,8 @@ datasets/* !datasets/karate-disjoint.csv !datasets/netscience.csv +# nx-cugraph side effects +python/nx-cugraph/objects.inv .pydevproject diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 188ea1a266a..6b7ff14417c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,8 +15,9 @@ repos: hooks: - id: black language_version: python3 - args: [--target-version=py38] + args: [--target-version=py39] files: ^(python/.*|benchmarks/.*)$ + exclude: ^python/nx-cugraph/ - repo: https://github.com/PyCQA/flake8 rev: 6.0.0 hooks: diff --git a/ci/test_python.sh b/ci/test_python.sh index 8fa9a90ae69..9fa1de2e5e7 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -127,6 +127,13 @@ python -m nx_cugraph.scripts.print_tree --dispatch-name --plc --incomplete --dif python -m nx_cugraph.scripts.print_table popd +rapids-logger "ensure nx-cugraph autogenerated files are up to date" +pushd python/nx-cugraph +make || true +git diff --exit-code . +git checkout . +popd + rapids-logger "pytest cugraph-service (single GPU)" ./ci/run_cugraph_service_pytests.sh \ --verbose \ diff --git a/python/nx-cugraph/Makefile b/python/nx-cugraph/Makefile index 6e1b98ee6e9..6500d834ee7 100644 --- a/python/nx-cugraph/Makefile +++ b/python/nx-cugraph/Makefile @@ -1,12 +1,12 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. SHELL= /bin/bash .PHONY: all -all: plugin-info lint +all: plugin-info lint readme .PHONY: lint lint: - git ls-files | xargs pre-commit run --config lint.yaml --files + git ls-files | xargs pre-commit run --config lint.yaml --files || true .PHONY: lint-update lint-update: @@ -15,3 +15,10 @@ lint-update: .PHONY: plugin-info plugin-info: python _nx_cugraph/__init__.py + +objects.inv: + wget https://networkx.org/documentation/stable/objects.inv + +.PHONY: readme +readme: objects.inv + python scripts/update_readme.py README.md objects.inv diff --git a/python/nx-cugraph/README.md b/python/nx-cugraph/README.md index 5d0554734a8..8201dc34eb2 100644 --- a/python/nx-cugraph/README.md +++ b/python/nx-cugraph/README.md @@ -91,144 +91,144 @@ familiar and easy-to-use API. Below is the list of algorithms that are currently supported in nx-cugraph. -### Algorithms - -``` -bipartite - ├─ basic - │ └─ is_bipartite - └─ generators - └─ complete_bipartite_graph -centrality - ├─ betweenness - │ ├─ betweenness_centrality - │ └─ edge_betweenness_centrality - ├─ degree_alg - │ ├─ degree_centrality - │ ├─ in_degree_centrality - │ └─ out_degree_centrality - ├─ eigenvector - │ └─ eigenvector_centrality - └─ katz - └─ katz_centrality -cluster - ├─ average_clustering - ├─ clustering - ├─ transitivity - └─ triangles -community - └─ louvain - └─ louvain_communities -components - ├─ connected - │ ├─ connected_components - │ ├─ is_connected - │ ├─ node_connected_component - │ └─ number_connected_components - └─ weakly_connected - ├─ is_weakly_connected - ├─ number_weakly_connected_components - └─ weakly_connected_components -core - ├─ core_number - └─ k_truss -dag - ├─ ancestors - └─ descendants -isolate - ├─ is_isolate - ├─ isolates - └─ number_of_isolates -link_analysis - ├─ hits_alg - │ └─ hits - └─ pagerank_alg - └─ pagerank -operators - └─ unary - ├─ complement - └─ reverse -reciprocity - ├─ overall_reciprocity - └─ reciprocity -shortest_paths - └─ unweighted - ├─ single_source_shortest_path_length - └─ single_target_shortest_path_length -traversal - └─ breadth_first_search - ├─ bfs_edges - ├─ bfs_layers - ├─ bfs_predecessors - ├─ bfs_successors - ├─ bfs_tree - ├─ descendants_at_distance - └─ generic_bfs_edges -tree - └─ recognition - ├─ is_arborescence - ├─ is_branching - ├─ is_forest - └─ is_tree -``` - -### Generators - -``` -classic - ├─ barbell_graph - ├─ circular_ladder_graph - ├─ complete_graph - ├─ complete_multipartite_graph - ├─ cycle_graph - ├─ empty_graph - ├─ ladder_graph - ├─ lollipop_graph - ├─ null_graph - ├─ path_graph - ├─ star_graph - ├─ tadpole_graph - ├─ trivial_graph - ├─ turan_graph - └─ wheel_graph -community - └─ caveman_graph -small - ├─ bull_graph - ├─ chvatal_graph - ├─ cubical_graph - ├─ desargues_graph - ├─ diamond_graph - ├─ dodecahedral_graph - ├─ frucht_graph - ├─ heawood_graph - ├─ house_graph - ├─ house_x_graph - ├─ icosahedral_graph - ├─ krackhardt_kite_graph - ├─ moebius_kantor_graph - ├─ octahedral_graph - ├─ pappus_graph - ├─ petersen_graph - ├─ sedgewick_maze_graph - ├─ tetrahedral_graph - ├─ truncated_cube_graph - ├─ truncated_tetrahedron_graph - └─ tutte_graph -social - ├─ davis_southern_women_graph - ├─ florentine_families_graph - ├─ karate_club_graph - └─ les_miserables_graph -``` +### [Algorithms](https://networkx.org/documentation/latest/reference/algorithms/index.html) + +
+bipartite
+ ├─ basic
+ │   └─ is_bipartite
+ └─ generators
+     └─ complete_bipartite_graph
+centrality
+ ├─ betweenness
+ │   ├─ betweenness_centrality
+ │   └─ edge_betweenness_centrality
+ ├─ degree_alg
+ │   ├─ degree_centrality
+ │   ├─ in_degree_centrality
+ │   └─ out_degree_centrality
+ ├─ eigenvector
+ │   └─ eigenvector_centrality
+ └─ katz
+     └─ katz_centrality
+cluster
+ ├─ average_clustering
+ ├─ clustering
+ ├─ transitivity
+ └─ triangles
+community
+ └─ louvain
+     └─ louvain_communities
+components
+ ├─ connected
+ │   ├─ connected_components
+ │   ├─ is_connected
+ │   ├─ node_connected_component
+ │   └─ number_connected_components
+ └─ weakly_connected
+     ├─ is_weakly_connected
+     ├─ number_weakly_connected_components
+     └─ weakly_connected_components
+core
+ ├─ core_number
+ └─ k_truss
+dag
+ ├─ ancestors
+ └─ descendants
+isolate
+ ├─ is_isolate
+ ├─ isolates
+ └─ number_of_isolates
+link_analysis
+ ├─ hits_alg
+ │   └─ hits
+ └─ pagerank_alg
+     └─ pagerank
+operators
+ └─ unary
+     ├─ complement
+     └─ reverse
+reciprocity
+ ├─ overall_reciprocity
+ └─ reciprocity
+shortest_paths
+ └─ unweighted
+     ├─ single_source_shortest_path_length
+     └─ single_target_shortest_path_length
+traversal
+ └─ breadth_first_search
+     ├─ bfs_edges
+     ├─ bfs_layers
+     ├─ bfs_predecessors
+     ├─ bfs_successors
+     ├─ bfs_tree
+     ├─ descendants_at_distance
+     └─ generic_bfs_edges
+tree
+ └─ recognition
+     ├─ is_arborescence
+     ├─ is_branching
+     ├─ is_forest
+     └─ is_tree
+
+ +### [Generators](https://networkx.org/documentation/latest/reference/generators.html) + +
+classic
+ ├─ barbell_graph
+ ├─ circular_ladder_graph
+ ├─ complete_graph
+ ├─ complete_multipartite_graph
+ ├─ cycle_graph
+ ├─ empty_graph
+ ├─ ladder_graph
+ ├─ lollipop_graph
+ ├─ null_graph
+ ├─ path_graph
+ ├─ star_graph
+ ├─ tadpole_graph
+ ├─ trivial_graph
+ ├─ turan_graph
+ └─ wheel_graph
+community
+ └─ caveman_graph
+small
+ ├─ bull_graph
+ ├─ chvatal_graph
+ ├─ cubical_graph
+ ├─ desargues_graph
+ ├─ diamond_graph
+ ├─ dodecahedral_graph
+ ├─ frucht_graph
+ ├─ heawood_graph
+ ├─ house_graph
+ ├─ house_x_graph
+ ├─ icosahedral_graph
+ ├─ krackhardt_kite_graph
+ ├─ moebius_kantor_graph
+ ├─ octahedral_graph
+ ├─ pappus_graph
+ ├─ petersen_graph
+ ├─ sedgewick_maze_graph
+ ├─ tetrahedral_graph
+ ├─ truncated_cube_graph
+ ├─ truncated_tetrahedron_graph
+ └─ tutte_graph
+social
+ ├─ davis_southern_women_graph
+ ├─ florentine_families_graph
+ ├─ karate_club_graph
+ └─ les_miserables_graph
+
### Other -``` -convert_matrix - ├─ from_pandas_edgelist - └─ from_scipy_sparse_array -``` +
+convert_matrix
+ ├─ from_pandas_edgelist
+ └─ from_scipy_sparse_array
+
To request nx-cugraph backend support for a NetworkX API that is not listed above, visit the [cuGraph GitHub repo](https://github.com/rapidsai/cugraph). diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index 8e87fc23592..fdd24861da7 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -31,7 +31,7 @@ repos: - id: validate-pyproject name: Validate pyproject.toml - repo: https://github.com/PyCQA/autoflake - rev: v2.2.1 + rev: v2.3.0 hooks: - id: autoflake args: [--in-place] @@ -40,17 +40,17 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v3.15.0 + rev: v3.15.1 hooks: - id: pyupgrade args: [--py39-plus] - repo: https://github.com/psf/black - rev: 23.12.1 + rev: 24.2.0 hooks: - id: black # - id: black-jupyter - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.14 + rev: v0.2.2 hooks: - id: ruff args: [--fix-only, --show-fixes] # --unsafe-fixes] @@ -62,7 +62,7 @@ repos: additional_dependencies: &flake8_dependencies # These versions need updated manually - flake8==7.0.0 - - flake8-bugbear==24.1.17 + - flake8-bugbear==24.2.6 - flake8-simplify==0.21.0 - repo: https://github.com/asottile/yesqa rev: v1.5.0 @@ -77,7 +77,7 @@ repos: additional_dependencies: [tomli] files: ^(nx_cugraph|docs)/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.14 + rev: v0.2.2 hooks: - id: ruff - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py index 9e723624a3b..e61a931c069 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py @@ -66,9 +66,9 @@ def hits( resource_handle=plc.ResourceHandle(), graph=G._get_plc_graph(weight, 1, dtype, store_transposed=True), tol=tol, - initial_hubs_guess_vertices=None - if nstart is None - else cp.arange(N, dtype=index_dtype), + initial_hubs_guess_vertices=( + None if nstart is None else cp.arange(N, dtype=index_dtype) + ), initial_hubs_guess_values=nstart, max_iter=max_iter, normalized=normalized, diff --git a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/pagerank_alg.py b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/pagerank_alg.py index 55fcc3e520a..40224e91d57 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/pagerank_alg.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/pagerank_alg.py @@ -78,9 +78,9 @@ def pagerank( "graph": G._get_plc_graph(weight, 1, dtype, store_transposed=True), "precomputed_vertex_out_weight_vertices": None, "precomputed_vertex_out_weight_sums": None, - "initial_guess_vertices": None - if nstart is None - else cp.arange(N, dtype=index_dtype), + "initial_guess_vertices": ( + None if nstart is None else cp.arange(N, dtype=index_dtype) + ), "initial_guess_values": nstart, "alpha": alpha, "epsilon": N * tol, diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 0951ee6b135..5132e6a547b 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -634,8 +634,7 @@ def _get_plc_graph( "pylibcugraph only supports float16 and float32 dtypes." ) elif ( - edge_array.dtype == np.uint64 - and edge_array.max().tolist() > 2**53 + edge_array.dtype == np.uint64 and edge_array.max().tolist() > 2**53 ): raise ValueError( f"Integer value of value is too large (> 2**53): {val}; " diff --git a/python/nx-cugraph/nx_cugraph/classes/multigraph.py b/python/nx-cugraph/nx_cugraph/classes/multigraph.py index fb787369e58..de58474de70 100644 --- a/python/nx-cugraph/nx_cugraph/classes/multigraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/multigraph.py @@ -360,9 +360,7 @@ def get_edge_data( if k not in self.edge_masks or self.edge_masks[k][index] } return { - edge_keys[index] - if edge_keys is not None - else index: { + edge_keys[index] if edge_keys is not None else index: { k: v[index].tolist() for k, v in self.edge_values.items() if k not in self.edge_masks or self.edge_masks[k][index] diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py index 3c62fc3628e..d044ba6960d 100644 --- a/python/nx-cugraph/nx_cugraph/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -220,9 +220,9 @@ def key(testpath): ) if sys.version_info[:2] == (3, 9): # This test is sensitive to RNG, which depends on Python version - xfail[ - key("test_louvain.py:test_threshold") - ] = "Louvain does not support seed parameter" + xfail[key("test_louvain.py:test_threshold")] = ( + "Louvain does not support seed parameter" + ) if nxver.major == 3 and nxver.minor >= 2: xfail.update( { diff --git a/python/nx-cugraph/nx_cugraph/scripts/print_table.py b/python/nx-cugraph/nx_cugraph/scripts/print_table.py index 117a1444f48..7c90281247c 100755 --- a/python/nx-cugraph/nx_cugraph/scripts/print_table.py +++ b/python/nx-cugraph/nx_cugraph/scripts/print_table.py @@ -61,7 +61,8 @@ def main(path_to_info=None, *, file=sys.stdout): lines = ["networkx_path,dispatch_name,version_added,plc,is_incomplete,is_different"] lines.extend(",".join(map(str, info)) for info in path_to_info.values()) text = "\n".join(lines) - print(text, file=file) + if file is not None: + print(text, file=file) return text diff --git a/python/nx-cugraph/nx_cugraph/scripts/print_tree.py b/python/nx-cugraph/nx_cugraph/scripts/print_tree.py index 485873a447d..fbb1c3dd0c5 100755 --- a/python/nx-cugraph/nx_cugraph/scripts/print_tree.py +++ b/python/nx-cugraph/nx_cugraph/scripts/print_tree.py @@ -12,29 +12,58 @@ # See the License for the specific language governing permissions and # limitations under the License. import argparse -import re import sys -import networkx as nx +from nx_cugraph.scripts.print_table import Info, get_path_to_info -from nx_cugraph.scripts.print_table import get_path_to_info +def assoc_in(d, keys, value): + """Like Clojure's assoc-in, but modifies d in-place.""" + inner = d + keys = iter(keys) + key = next(keys) + for next_key in keys: + if key not in inner: + inner[key] = {} + inner = inner[key] + key = next_key + inner[key] = value + return d -def add_branch(G, path, extra="", *, skip=0): - branch = path.split(".") - prev = ".".join(branch[: skip + 1]) - for i in range(skip + 2, len(branch)): - cur = ".".join(branch[:i]) - G.add_edge(prev, cur) - prev = cur - if extra: - if not isinstance(extra, str): - extra = ", ".join(extra) - path += f" ({extra})" - G.add_edge(prev, path) + +def default_get_payload_internal(keys): + return keys[-1] + + +def tree_lines( + tree, + parents=(), + are_levels_closing=(), + get_payload_internal=default_get_payload_internal, +): + pre = "".join( + " " if is_level_closing else " │ " + for is_level_closing in are_levels_closing + ) + c = "├" + are_levels_closing += (False,) + for i, (key, val) in enumerate(tree.items(), 1): + if i == len(tree): # Last item + c = "└" + are_levels_closing = are_levels_closing[:-1] + (True,) + if isinstance(val, str): + yield pre + f" {c}─ " + val + else: + yield pre + f" {c}─ " + get_payload_internal((*parents, key)) + yield from tree_lines( + val, + (*parents, key), + are_levels_closing, + get_payload_internal=get_payload_internal, + ) -def get_extra( +def get_payload( info, *, networkx_path=False, @@ -64,7 +93,10 @@ def get_extra( extra.append("is-incomplete") if different and info.is_different: extra.append("is-different") - return extra + extra = ", ".join(extra) + if extra: + extra = f" ({extra})" + return info.networkx_path.rsplit(".", 1)[-1] + extra def create_tree( @@ -80,12 +112,20 @@ def create_tree( incomplete=False, different=False, prefix="", + strip_networkx=True, + get_payload=get_payload, ): if path_to_info is None: path_to_info = get_path_to_info() + if strip_networkx: + path_to_info = { + key: Info(info.networkx_path.replace("networkx.", "", 1), *info[1:]) + for key, info in path_to_info.items() + } if isinstance(by, str): by = [by] - G = nx.DiGraph() + # We rely on the fact that dicts maintain order + tree = {} for info in sorted( path_to_info.values(), key=lambda x: (*(getattr(x, b) for b in by), x.networkx_path), @@ -93,7 +133,7 @@ def create_tree( if not all(getattr(info, b) for b in by): continue path = prefix + ".".join(getattr(info, b) for b in by) - extra = get_extra( + payload = get_payload( info, networkx_path=networkx_path, dispatch_name=dispatch_name, @@ -103,8 +143,8 @@ def create_tree( incomplete=incomplete, different=different, ) - add_branch(G, path, extra=extra, skip=skip) - return G + assoc_in(tree, path.split("."), payload) + return tree def main( @@ -132,45 +172,33 @@ def main( "different": different, } if by == "networkx_path": - G = create_tree(path_to_info, by="networkx_path", **kwargs) - text = re.sub( - r" [A-Za-z_\./]+\.", " ", ("\n".join(nx.generate_network_text(G))) - ) + tree = create_tree(path_to_info, by="networkx_path", **kwargs) + text = "\n".join(tree_lines(tree)) elif by == "plc": - G = create_tree( - path_to_info, by=["plc", "networkx_path"], prefix="plc-", **kwargs - ) - text = re.sub( - "plc-", - "plc.", - re.sub( - r" plc-[A-Za-z_\./]*\.", - " ", - "\n".join(nx.generate_network_text(G)), - ), + tree = create_tree( + path_to_info, + by=["plc", "networkx_path"], + prefix="plc-", + **kwargs, ) + text = "\n".join(tree_lines(tree)).replace("plc-", "plc.") elif by == "version_added": - G = create_tree( + tree = create_tree( path_to_info, by=["version_added", "networkx_path"], prefix="version_added-", **kwargs, ) - text = re.sub( - "version_added-", - "version: ", - re.sub( - r" version_added-[-0-9A-Za-z_\./]*\.", - " ", - "\n".join(nx.generate_network_text(G)), - ), - ).replace("-", ".") + text = "\n".join(tree_lines(tree)).replace("version_added-", "version: ") + for digit in "0123456789": + text = text.replace(f"2{digit}-", f"2{digit}.") else: raise ValueError( "`by` argument should be one of {'networkx_path', 'plc', 'version_added' " f"got: {by}" ) - print(text, file=file) + if file is not None: + print(text, file=file) return text diff --git a/python/nx-cugraph/pyproject.toml b/python/nx-cugraph/pyproject.toml index a7525530ac8..60a4b5589d4 100644 --- a/python/nx-cugraph/pyproject.toml +++ b/python/nx-cugraph/pyproject.toml @@ -149,8 +149,10 @@ exclude_lines = [ # https://github.com/charliermarsh/ruff/ line-length = 88 target-version = "py39" +[tool.ruff.lint] unfixable = [ "F841", # unused-variable (Note: can leave useless expression) + "B905", # zip-without-explicit-strict (Note: prefer `zip(x, y, strict=True)`) ] select = [ "ALL", @@ -172,7 +174,6 @@ ignore = [ # "SIM401", # Use dict.get ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer) # "TRY004", # Prefer `TypeError` exception for invalid type (Note: good advice, but not worth the nuisance) "B904", # Bare `raise` inside exception clause (like TRY200; sometimes okay) - "TRY200", # Use `raise from` to specify exception cause (Note: sometimes okay to raise original exception) # Intentionally ignored "A003", # Class attribute ... is shadowing a python builtin @@ -224,22 +225,22 @@ ignore = [ "PD", # pandas-vet (Intended for scripts that use pandas, not libraries) ] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "__init__.py" = ["F401"] # Allow unused imports (w/o defining `__all__`) # Allow assert, print, RNG, and no docstring "nx_cugraph/**/tests/*py" = ["S101", "S311", "T201", "D103", "D100"] "_nx_cugraph/__init__.py" = ["E501"] "nx_cugraph/algorithms/**/*py" = ["D205", "D401"] # Allow flexible docstrings for algorithms -[tool.ruff.flake8-annotations] +[tool.ruff.lint.flake8-annotations] mypy-init-return = true -[tool.ruff.flake8-builtins] +[tool.ruff.lint.flake8-builtins] builtins-ignorelist = ["copyright"] -[tool.ruff.flake8-pytest-style] +[tool.ruff.lint.flake8-pytest-style] fixture-parentheses = false mark-parentheses = false -[tool.ruff.pydocstyle] +[tool.ruff.lint.pydocstyle] convention = "numpy" diff --git a/python/nx-cugraph/scripts/update_readme.py b/python/nx-cugraph/scripts/update_readme.py new file mode 100644 index 00000000000..1ab5a76c4c0 --- /dev/null +++ b/python/nx-cugraph/scripts/update_readme.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import re +import zlib +from collections import namedtuple +from pathlib import Path +from warnings import warn + +from nx_cugraph.scripts.print_tree import create_tree, tree_lines + +# See: https://sphobjinv.readthedocs.io/en/stable/syntax.html +DocObject = namedtuple( + "DocObject", + "name, domain, role, priority, uri, displayname", +) + + +def parse_docobject(line): + left, right = line.split(":") + name, domain = left.rsplit(" ", 1) + role, priority, uri, displayname = right.split(" ", 3) + if displayname == "-": + displayname = name + if uri.endswith("$"): + uri = uri[:-1] + name + return DocObject(name, domain, role, priority, uri, displayname) + + +def replace_body(text, match, new_body): + start, stop = match.span("body") + return text[:start] + new_body + text[stop:] + + +# NetworkX isn't perfectly intersphinx-compatible, so manually specify some urls. +# See: https://github.com/networkx/networkx/issues/7278 +MANUAL_OBJECT_URLS = { + "networkx.algorithms.centrality.betweenness": ( + "https://networkx.org/documentation/stable/reference/" + "algorithms/centrality.html#shortest-path-betweenness" + ), + "networkx.algorithms.centrality.degree_alg": ( + "https://networkx.org/documentation/stable/reference/" + "algorithms/centrality.html#degree" + ), + "networkx.algorithms.centrality.eigenvector": ( + "https://networkx.org/documentation/stable/reference/" + "algorithms/centrality.html#eigenvector" + ), + "networkx.algorithms.centrality.katz": ( + "https://networkx.org/documentation/stable/reference/" + "algorithms/centrality.html#eigenvector" + ), + "networkx.algorithms.components.connected": ( + "https://networkx.org/documentation/stable/reference/" + "algorithms/component.html#connectivity" + ), + "networkx.algorithms.components.weakly_connected": ( + "https://networkx.org/documentation/stable/reference/" + "algorithms/component.html#weak-connectivity" + ), +} + + +def main(readme_file, objects_filename): + """``readme_file`` must be readable and writable, so use mode ``"a+"``""" + # Use the `objects.inv` file to determine URLs. For details about this file, see: + # https://sphobjinv.readthedocs.io/en/stable/syntax.html + # We might be better off using a library like that, but roll our own for now. + with Path(objects_filename).open("rb") as objects_file: + line = objects_file.readline() + if line != b"# Sphinx inventory version 2\n": + raise RuntimeError(f"Bad line in objects.inv:\n\n{line}") + line = objects_file.readline() + if line != b"# Project: NetworkX\n": + raise RuntimeError(f"Bad line in objects.inv:\n\n{line}") + line = objects_file.readline() + if not line.startswith(b"# Version: "): + raise RuntimeError(f"Bad line in objects.inv:\n\n{line}") + line = objects_file.readline() + if line != b"# The remainder of this file is compressed using zlib.\n": + raise RuntimeError(f"Bad line in objects.inv:\n\n{line}") + zlib_data = objects_file.read() + objects_text = zlib.decompress(zlib_data).decode().strip() + objects_list = [parse_docobject(line) for line in objects_text.split("\n")] + doc_urls = { + obj.name: "https://networkx.org/documentation/stable/" + obj.uri + for obj in objects_list + } + if len(objects_list) != len(doc_urls): + raise RuntimeError("Oops; duplicate names found in objects.inv") + + def get_payload(info, **kwargs): + path = "networkx." + info.networkx_path + subpath, name = path.rsplit(".", 1) + # Many objects are referred to in modules above where they are defined. + while subpath: + path = f"{subpath}.{name}" + if path in doc_urls: + return f'{name}' + subpath = subpath.rsplit(".", 1)[0] + warn(f"Unable to find URL for {name!r}: {path}", stacklevel=0) + return name + + def get_payload_internal(keys): + path = "networkx." + ".".join(keys) + name = keys[-1] + if path in doc_urls: + return f'{name}' + path2 = "reference/" + "/".join(keys) + if path2 in doc_urls: + return f'{name}' + if path in MANUAL_OBJECT_URLS: + return f'{name}' + warn(f"Unable to find URL for {name!r}: {path}", stacklevel=0) + return name + + readme_file.seek(0) + text = readme_file.read() + tree = create_tree(get_payload=get_payload) + # Algorithms + match = re.search( + r"### .Algorithms(?P.*?)
\n(?P.*?)\n
", + text, + re.DOTALL, + ) + if not match: + raise RuntimeError("Algorithms section not found!") + lines = [] + for key, val in tree["algorithms"].items(): + lines.append(get_payload_internal(("algorithms", key))) + lines.extend( + tree_lines( + val, + parents=("algorithms", key), + get_payload_internal=get_payload_internal, + ) + ) + text = replace_body(text, match, "\n".join(lines)) + # Generators + match = re.search( + r"### .Generators(?P.*?)
\n(?P.*?)\n
", + text, + re.DOTALL, + ) + if not match: + raise RuntimeError("Generators section not found!") + lines = [] + for key, val in tree["generators"].items(): + lines.append(get_payload_internal(("generators", key))) + lines.extend( + tree_lines( + val, + parents=("generators", key), + get_payload_internal=get_payload_internal, + ) + ) + text = replace_body(text, match, "\n".join(lines)) + # Other + match = re.search( + r"### Other\n(?P.*?)
\n(?P.*?)\n
", + text, + re.DOTALL, + ) + if not match: + raise RuntimeError("Other section not found!") + lines = [] + for key, val in tree.items(): + if key in {"algorithms", "generators"}: + continue + lines.append(get_payload_internal((key,))) + lines.extend( + tree_lines(val, parents=(key,), get_payload_internal=get_payload_internal) + ) + text = replace_body(text, match, "\n".join(lines)) + # Now overwrite README.md + readme_file.truncate(0) + readme_file.write(text) + return text + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + "Update README.md to show NetworkX functions implemented by nx-cugraph" + ) + parser.add_argument("readme_filename", help="Path to the README.md file") + parser.add_argument( + "networkx_objects", help="Path to the objects.inv file from networkx docs" + ) + args = parser.parse_args() + with Path(args.readme_filename).open("a+") as readme_file: + main(readme_file, args.networkx_objects)