From f35c68fc94b0e5cfa524975767cd3f14b784bfdb Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 1 Feb 2024 02:42:48 -0600 Subject: [PATCH] Updates nx-cugraph README.md with latest algos, adds script to auto-update README.md from nx_cugraph metadata directly. --- python/nx-cugraph/Makefile | 6 +- python/nx-cugraph/README.md | 138 +++++++++++----- .../nx_cugraph/scripts/print_table.py | 2 +- python/nx-cugraph/update_readme.py | 150 ++++++++++++++++++ 4 files changed, 254 insertions(+), 42 deletions(-) create mode 100644 python/nx-cugraph/update_readme.py diff --git a/python/nx-cugraph/Makefile b/python/nx-cugraph/Makefile index 6e1b98ee6e9..bd0d3c867e4 100644 --- a/python/nx-cugraph/Makefile +++ b/python/nx-cugraph/Makefile @@ -2,7 +2,7 @@ SHELL= /bin/bash .PHONY: all -all: plugin-info lint +all: plugin-info lint readme .PHONY: lint lint: @@ -15,3 +15,7 @@ lint-update: .PHONY: plugin-info plugin-info: python _nx_cugraph/__init__.py + +.PHONY: readme +readme: + python update_readme.py README.md diff --git a/python/nx-cugraph/README.md b/python/nx-cugraph/README.md index f6a9aac1088..2a590fcecb7 100644 --- a/python/nx-cugraph/README.md +++ b/python/nx-cugraph/README.md @@ -89,48 +89,106 @@ interface to its CUDA-based graph analytics library) and [CuPy](https://cupy.dev/) (a GPU-accelerated array library) to NetworkX's familiar and easy-to-use API. -Below is the list of algorithms (many listed using pylibcugraph names), -available today in pylibcugraph or implemented using CuPy, that are or will be +Below is the list of algorithms that are currently supported or planned to be supported in nx-cugraph. -| feature/algo | release/target version | -| ----- | ----- | -| analyze_clustering_edge_cut | ? | -| analyze_clustering_modularity | ? | -| analyze_clustering_ratio_cut | ? | -| balanced_cut_clustering | ? | -| betweenness_centrality | 23.10 | -| bfs | ? | -| connected_components | 23.12 | -| core_number | ? | -| degree_centrality | 23.12 | -| ecg | ? | -| edge_betweenness_centrality | 23.10 | -| ego_graph | ? | -| eigenvector_centrality | 23.12 | -| get_two_hop_neighbors | ? | -| hits | 23.12 | -| in_degree_centrality | 23.12 | -| induced_subgraph | ? | -| jaccard_coefficients | ? | -| katz_centrality | 23.12 | -| k_core | ? | -| k_truss_subgraph | 23.12 | -| leiden | ? | -| louvain | 23.10 | -| node2vec | ? | -| out_degree_centrality | 23.12 | -| overlap_coefficients | ? | -| pagerank | 23.12 | -| personalized_pagerank | ? | -| sorensen_coefficients | ? | -| spectral_modularity_maximization | ? | -| sssp | 23.12 | -| strongly_connected_components | ? | -| triangle_count | ? | -| uniform_neighbor_sample | ? | -| uniform_random_walks | ? | -| weakly_connected_components | ? | +| feature/algo | release/target version | +|:-------------------------------------|:-------------------------| +| ancestors | 24.02 | +| average_clustering | 24.02 | +| barbell_graph | 23.12 | +| betweenness_centrality | 23.10 | +| bfs_edges | 24.02 | +| bfs_layers | 24.02 | +| bfs_predecessors | 24.02 | +| bfs_successors | 24.02 | +| bfs_tree | 24.02 | +| bull_graph | 23.12 | +| caveman_graph | 23.12 | +| chvatal_graph | 23.12 | +| circular_ladder_graph | 23.12 | +| clustering | 24.02 | +| complement | 24.02 | +| complete_bipartite_graph | 23.12 | +| complete_graph | 23.12 | +| complete_multipartite_graph | 23.12 | +| connected_components | 23.12 | +| core_number | 24.02 | +| cubical_graph | 23.12 | +| cycle_graph | 23.12 | +| davis_southern_women_graph | 23.12 | +| degree_centrality | 23.12 | +| desargues_graph | 23.12 | +| descendants | 24.02 | +| descendants_at_distance | 24.02 | +| diamond_graph | 23.12 | +| dodecahedral_graph | 23.12 | +| edge_betweenness_centrality | 23.10 | +| eigenvector_centrality | 23.12 | +| empty_graph | 23.12 | +| florentine_families_graph | 23.12 | +| from_pandas_edgelist | 23.12 | +| from_scipy_sparse_array | 23.12 | +| frucht_graph | 23.12 | +| generic_bfs_edges | 24.02 | +| heawood_graph | 23.12 | +| hits | 23.12 | +| house_graph | 23.12 | +| house_x_graph | 23.12 | +| icosahedral_graph | 23.12 | +| in_degree_centrality | 23.12 | +| is_arborescence | 24.02 | +| is_bipartite | 24.02 | +| is_branching | 24.02 | +| is_connected | 23.12 | +| is_forest | 24.02 | +| is_isolate | 23.10 | +| is_strongly_connected | 24.02 | +| is_tree | 24.02 | +| is_weakly_connected | 24.02 | +| isolates | 23.10 | +| k_truss | 23.12 | +| karate_club_graph | 23.12 | +| katz_centrality | 23.12 | +| krackhardt_kite_graph | 23.12 | +| ladder_graph | 23.12 | +| leiden | ? | +| les_miserables_graph | 23.12 | +| lollipop_graph | 23.12 | +| louvain_communities | 23.10 | +| moebius_kantor_graph | 23.12 | +| node_connected_component | 23.12 | +| null_graph | 23.12 | +| number_connected_components | 23.12 | +| number_of_isolates | 23.10 | +| number_strongly_connected_components | 24.02 | +| number_weakly_connected_components | 24.02 | +| octahedral_graph | 23.12 | +| out_degree_centrality | 23.12 | +| overall_reciprocity | 24.02 | +| pagerank | 23.12 | +| pappus_graph | 23.12 | +| path_graph | 23.12 | +| petersen_graph | 23.12 | +| reciprocity | 24.02 | +| reverse | 24.02 | +| sedgewick_maze_graph | 23.12 | +| single_source_shortest_path_length | 23.12 | +| single_target_shortest_path_length | 23.12 | +| star_graph | 23.12 | +| strongly_connected_components | 24.02 | +| tadpole_graph | 23.12 | +| tetrahedral_graph | 23.12 | +| transitivity | 24.02 | +| triangles | 24.02 | +| trivial_graph | 23.12 | +| truncated_cube_graph | 23.12 | +| truncated_tetrahedron_graph | 23.12 | +| turan_graph | 23.12 | +| tutte_graph | 23.12 | +| uniform_neighbor_sample | ? | +| weakly_connected_components | 24.02 | +| wheel_graph | 23.12 | To request nx-cugraph backend support for a NetworkX API that is not listed above, visit the [cuGraph GitHub repo](https://github.com/rapidsai/cugraph). diff --git a/python/nx-cugraph/nx_cugraph/scripts/print_table.py b/python/nx-cugraph/nx_cugraph/scripts/print_table.py index 7e69de63dc1..be9cfa31c48 100755 --- a/python/nx-cugraph/nx_cugraph/scripts/print_table.py +++ b/python/nx-cugraph/nx_cugraph/scripts/print_table.py @@ -59,7 +59,7 @@ def main(path_to_info=None, *, file=sys.stdout): if path_to_info is None: path_to_info = get_path_to_info(version_added_sep=".") lines = ["networkx_path,dispatch_name,version_added,plc,is_incomplete,is_different"] - lines.extend(",".join(info) for info in path_to_info.values()) + lines.extend(",".join([str(i) for i in info]) for info in path_to_info.values()) text = "\n".join(lines) print(text, file=file) return text diff --git a/python/nx-cugraph/update_readme.py b/python/nx-cugraph/update_readme.py new file mode 100644 index 00000000000..ecc92eca9dc --- /dev/null +++ b/python/nx-cugraph/update_readme.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import io +import re + +from nx_cugraph.scripts.print_table import main as nxcg_print_table + +import pandas as pd + +table_header_string = "| feature/algo | release/target version |" +table_header_patt = re.compile(r"\| feature/algo[\s]+\| release/target version[\s]+\|") +nxcg_algo_col_name = "dispatch_name" +readme_algo_col_name = "feature/algo" +nxcg_version_col_name = "version_added" +readme_version_col_name = "release/target version" + + +def get_current_nxcg_data(): + """ + Returns a DataFrame containing all meta-data from the current nx_cugraph package. + """ + buf = io.StringIO() + nxcg_print_table(file=buf) + buf.seek(0) + return pd.read_csv(buf, dtype={nxcg_version_col_name: str}) + + +def get_readme_sections(readme_file_name): + """ + Returns the README as three lists of strings: (before_table, table, after_table) + """ + assert readme_file_name.endswith(".md") + + before_table = [] + table = [] + after_table = [] + + with open(readme_file_name) as fd: + lines = iter([ln.rstrip() for ln in fd.readlines()]) + line = next(lines, None) + + # everything before the markdown table + while line is not None and not table_header_patt.fullmatch(line): + before_table.append(line) + line = next(lines, None) + + if line is not None and table_header_patt.fullmatch(line): + # table body + while line is not None and line.startswith("|"): + table.append(line) + line = next(lines, None) + + # everything after the table + while line is not None: + after_table.append(line) + line = next(lines, None) + + else: + raise RuntimeError( + "Could not find start of table matching " + f"'{table_header_string}' in {readme_file_name}" + ) + + return (before_table, table, after_table) + + +def get_readme_table_data(table_lines): + """ + Returns a DataFrame containing all meta-data extracted from the markdown + table text passed in as a list of strings. + """ + csv_buf = io.StringIO() + lines = iter(table_lines) + line = next(lines, None) + + # process header + # Separate markdown line containing " | " delims and remove leading + # and trailing empty fields resulting from start/end "|" borders + fields = [f.strip() for f in line.split("|") if f] + print(*fields, sep=",", file=csv_buf) + + # Assume header underline line and consume it + line = next(lines, None) + assert line.startswith("|:-") or line.startswith("| -") + + # Read the table body + line = next(lines, None) + while line is not None and line.startswith("|"): + fields = [f.strip() for f in line.split("|") if f] + print(*fields, sep=",", file=csv_buf) + line = next(lines, None) + + csv_buf.seek(0) + return pd.read_csv(csv_buf, dtype={readme_version_col_name: str}) + + +def main(readme_file_name="README.md"): + nxcg_data = get_current_nxcg_data() + (before_table_lines, table_lines, after_table_lines) = get_readme_sections( + readme_file_name + ) + readme_data = get_readme_table_data(table_lines) + + # Use only the data needed for the README + nxcg_data_for_readme = nxcg_data[ + [nxcg_algo_col_name, nxcg_version_col_name] + ].rename( + { + nxcg_algo_col_name: readme_algo_col_name, + nxcg_version_col_name: readme_version_col_name, + }, + axis=1, + ) + + # Update the readme data with the latest nxcg data. This will add new algos + # to the readme data and replace any old version values in the readme data + # with current nxcg version values. + merged = readme_data.merge( + nxcg_data_for_readme, + how="outer", + on=readme_algo_col_name, + ) + x = readme_version_col_name + "_x" + y = readme_version_col_name + "_y" + merged[readme_version_col_name] = merged[y].fillna(merged[x]) + merged.drop([x, y], axis=1, inplace=True) + merged.sort_values(by=readme_algo_col_name, inplace=True) + + # Rewrite the README with the updated table + with open(readme_file_name, "w") as fd: + print("\n".join(before_table_lines), file=fd) + print(merged.to_markdown(index=False), file=fd) + print("\n".join(after_table_lines), file=fd) + + +if __name__ == "__main__": + import sys + + main(sys.argv[1])