From 0b80ccfef123b807d953da2050de5baa80eb3595 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Sat, 24 Aug 2024 13:28:19 -0700 Subject: [PATCH] Some clean up; try to make tests in CI pass --- .../algorithms/community/louvain.py | 2 +- python/nx-cugraph/nx_cugraph/classes/graph.py | 2 +- python/nx-cugraph/nx_cugraph/classes/zero.py | 17 +- python/nx-cugraph/nx_cugraph/convert.py | 23 +- .../nx-cugraph/nx_cugraph/convert_matrix.py | 2 +- python/nx-cugraph/nx_cugraph/interface.py | 219 +++++++++++------- python/nx-cugraph/nx_cugraph/relabel.py | 6 +- python/nx-cugraph/run_nx_tests.sh | 4 + 8 files changed, 167 insertions(+), 108 deletions(-) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py index 482bd32001e..52c512c454d 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py @@ -81,7 +81,7 @@ def _louvain_communities( node_ids, clusters, modularity = plc.louvain( resource_handle=plc.ResourceHandle(), graph=G._get_plc_graph(weight, 1, dtype), - max_level=max_level, # TODO: add this parameter to NetworkX + max_level=max_level, threshold=threshold, resolution=resolution, do_expensive_check=False, diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 74b2a758f31..2abf0cd6502 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -335,7 +335,7 @@ def __new__(cls, incoming_graph_data=None, **attr) -> Graph: else: raise NotImplementedError new_graph.graph.update(attr) - # XXX: we could return ZeroGraph here, but let's not for now + # We could return ZeroGraph here (if configured), but let's not for now return new_graph ################# diff --git a/python/nx-cugraph/nx_cugraph/classes/zero.py b/python/nx-cugraph/nx_cugraph/classes/zero.py index 4c80ea8bcbc..d3968766816 100644 --- a/python/nx-cugraph/nx_cugraph/classes/zero.py +++ b/python/nx-cugraph/nx_cugraph/classes/zero.py @@ -34,8 +34,7 @@ # `collections.UserDict` was the preferred way to subclass dict, but now # subclassing dict directly is much better supported and should work here. -# TODO: explore if having this class is strictly necessary, but we may choose -# to keep it anyway out of an abundance of caution. +# This class should only be necessary if the user clears the cache manually. class _ZeroGraphCache(dict): """Cache that ensures ZeroGraph will reify into a NetworkX graph when cleared.""" @@ -97,7 +96,12 @@ def _cugraph(self): if (Gcg := cache.get(_CACHE_KEY)) is not None: return Gcg if self.__dict__["_node"] is None: - raise RuntimeError("XXX") + raise RuntimeError( + f"{type(self).__name__} cannot be converted to the GPU, because it is " + "not on the CPU! This is not supposed to be possible. If you believe " + "you have found a bug, please report a minimum reproducible example to " + "https://github.com/rapidsai/cugraph/issues/new/choose" + ) try: Gcg = nxcg.from_networkx( self, preserve_edge_attrs=True, preserve_node_attrs=True @@ -115,6 +119,8 @@ def _cugraph(self, val): if (cache := getattr(self, "__networkx_cache__", None)) is None: # Should we warn? return + # TODO: pay close attention to when we should clear the cache, since + # this may or may not be a mutation. cache = cache.setdefault("backends", {}).setdefault("cugraph", {}) if val is None: cache.pop(_CACHE_KEY, None) @@ -126,8 +132,9 @@ def _cugraph(self, val): @nx.Graph.name.setter def name(self, s): - # Don't clear the cache when setting the name, since `.graph` is shared - # XXX + # Don't clear the cache when setting the name, since `.graph` is shared. + # There is a very small risk here for the cache to become (slightly) + # insconsistent if graphs from other backends are cached. self.graph["name"] = s @classmethod diff --git a/python/nx-cugraph/nx_cugraph/convert.py b/python/nx-cugraph/nx_cugraph/convert.py index e2b844c00ef..1abe112ea47 100644 --- a/python/nx-cugraph/nx_cugraph/convert.py +++ b/python/nx-cugraph/nx_cugraph/convert.py @@ -66,10 +66,12 @@ def _iterate_values(graph, adj, is_dicts, func): return func(it), False -def _not_implemented_error_diaper(func): - """Catch and convert exceptions to ``NotImplementedError`` +# Consider adding this to `utils` if it is useful elsewhere +def _fallback_decorator(func): + """Catch and convert exceptions to ``NotImplementedError``; use as a decorator. - ``nx.NetworkXError`` are raised without being converted. + ``nx.NetworkXError`` are raised without being converted. This allows + falling back to other backends if, for example, conversion to GPU failed. """ @functools.wraps(func) @@ -84,7 +86,7 @@ def inner(*args, **kwargs): return inner -@_not_implemented_error_diaper +@_fallback_decorator def from_networkx( graph: nx.Graph, edge_attrs: AttrKey | dict[AttrKey, EdgeValue | None] | None = None, @@ -182,7 +184,12 @@ def from_networkx( if graph._is_on_gpu: return graph._cugraph if not graph._is_on_cpu: - raise RuntimeError("TODO") + raise RuntimeError( + f"{type(graph).__name__} cannot be converted to the GPU, because it is " + "not on the CPU! This is not supposed to be possible. If you believe " + "you have found a bug, please report a minimum reproducible example to " + "https://github.com/rapidsai/cugraph/issues/new/choose" + ) if _nxver >= (3, 4): cache_key = _get_cache_key( edge_attrs=edge_attrs, @@ -547,7 +554,9 @@ def func(it, edge_attr=edge_attr, dtype=dtype): if preserve_graph_attrs: rv.graph.update(graph.graph) # deepcopy? if _nxver >= (3, 4) and isinstance(graph, ZeroGraph) and cache is not None: - # Make sure this conversion is added to the cache + # Make sure this conversion is added to the cache, and make all of + # our graphs share the same `.graph` attribute for consistency. + rv.graph = graph.graph _set_to_cache(cache, cache_key, rv) return rv @@ -753,7 +762,7 @@ def _to_undirected_graph( raise TypeError -@networkx_algorithm(version_added="24.08") +@networkx_algorithm(version_added="24.08", fallback=True) def from_dict_of_lists(d, create_using=None): from .generators._utils import _create_using_class diff --git a/python/nx-cugraph/nx_cugraph/convert_matrix.py b/python/nx-cugraph/nx_cugraph/convert_matrix.py index e616461b0d0..54975902861 100644 --- a/python/nx-cugraph/nx_cugraph/convert_matrix.py +++ b/python/nx-cugraph/nx_cugraph/convert_matrix.py @@ -163,7 +163,7 @@ def from_pandas_edgelist( return G -@networkx_algorithm(version_added="23.12") +@networkx_algorithm(version_added="23.12", fallback=True) def from_scipy_sparse_array( A, parallel_edges=False, create_using=None, edge_attribute="weight" ): diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py index 7110a629fde..8910e463904 100644 --- a/python/nx-cugraph/nx_cugraph/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -70,19 +70,30 @@ def key(testpath): return (testname, frozenset({classname, filename})) return (testname, frozenset({filename})) + zero = _nxver >= (3, 3) and nx.config.backends.cugraph.zero + fallback = zero or nx.utils.backends._dispatchable._fallback_to_nx + # Reasons for xfailing + # For nx version <= 3.1 no_weights = "weighted implementation not currently supported" no_multigraph = "multigraphs not currently supported" + # For nx version <= 3.2 + nx_cugraph_in_test_setup = ( + "nx-cugraph Graph is incompatible in test setup in nx versions < 3.3" + ) + # For all versions louvain_different = "Louvain may be different due to RNG" - no_string_dtype = "string edge values not currently supported" sssp_path_different = "sssp may choose a different valid path" + tuple_elements_preferred = "elements are tuples instead of lists" + no_mixed_dtypes_for_nodes = ( + # This one is tricky b/c we don't raise; all dtypes are treated as str + "mixed dtypes (str, int, float) for single node property not supported" + ) + # These shouldn't fail if using ZeroGraph or falling back to networkx + no_string_dtype = "string edge values not currently supported" no_object_dtype_for_edges = ( "Edges don't support object dtype (lists, strings, etc.)" ) - tuple_elements_preferred = "elements are tuples instead of lists" - nx_cugraph_in_test_setup = ( - "nx-cugraph Graph is incompatible in test setup in nx versions < 3.3" - ) xfail = { # This is removed while strongly_connected_components() is not @@ -106,38 +117,6 @@ def key(testpath): "test_cycles.py:TestMinimumCycleBasis." "test_gh6787_and_edge_attribute_names" ): sssp_path_different, - key( - "test_graph_hashing.py:test_isomorphic_edge_attr" - ): no_object_dtype_for_edges, - key( - "test_graph_hashing.py:test_isomorphic_edge_attr_and_node_attr" - ): no_object_dtype_for_edges, - key( - "test_graph_hashing.py:test_isomorphic_edge_attr_subgraph_hash" - ): no_object_dtype_for_edges, - key( - "test_graph_hashing.py:" - "test_isomorphic_edge_attr_and_node_attr_subgraph_hash" - ): no_object_dtype_for_edges, - key( - "test_summarization.py:TestSNAPNoEdgeTypes.test_summary_graph" - ): no_object_dtype_for_edges, - key( - "test_summarization.py:TestSNAPUndirected.test_summary_graph" - ): no_object_dtype_for_edges, - key( - "test_summarization.py:TestSNAPDirected.test_summary_graph" - ): no_object_dtype_for_edges, - key("test_gexf.py:TestGEXF.test_relabel"): no_object_dtype_for_edges, - key( - "test_gml.py:TestGraph.test_parse_gml_cytoscape_bug" - ): no_object_dtype_for_edges, - key("test_gml.py:TestGraph.test_parse_gml"): no_object_dtype_for_edges, - key("test_gml.py:TestGraph.test_read_gml"): no_object_dtype_for_edges, - key("test_gml.py:TestGraph.test_data_types"): no_object_dtype_for_edges, - key( - "test_gml.py:TestPropertyLists.test_reading_graph_with_list_property" - ): no_object_dtype_for_edges, key( "test_relabel.py:" "test_relabel_preserve_node_order_partial_mapping_with_copy_false" @@ -146,42 +125,105 @@ def key(testpath): "test_gml.py:" "TestPropertyLists.test_reading_graph_with_single_element_list_property" ): tuple_elements_preferred, - key( - "test_relabel.py:" - "TestRelabel.test_relabel_multidigraph_inout_merge_nodes" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multigraph_merge_inplace" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multidigraph_merge_inplace" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multidigraph_inout_copy" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multigraph_merge_copy" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multidigraph_merge_copy" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multigraph_nonnumeric_key" - ): no_string_dtype, - key("test_contraction.py:test_multigraph_path"): no_object_dtype_for_edges, - key( - "test_contraction.py:test_directed_multigraph_path" - ): no_object_dtype_for_edges, - key( - "test_contraction.py:test_multigraph_blockmodel" - ): no_object_dtype_for_edges, - key( - "test_summarization.py:TestSNAPUndirectedMulti.test_summary_graph" - ): no_string_dtype, - key( - "test_summarization.py:TestSNAPDirectedMulti.test_summary_graph" - ): no_string_dtype, } + if not fallback: + xfail.update( + { + key( + "test_graph_hashing.py:test_isomorphic_edge_attr" + ): no_object_dtype_for_edges, + key( + "test_graph_hashing.py:test_isomorphic_edge_attr_and_node_attr" + ): no_object_dtype_for_edges, + key( + "test_graph_hashing.py:test_isomorphic_edge_attr_subgraph_hash" + ): no_object_dtype_for_edges, + key( + "test_graph_hashing.py:" + "test_isomorphic_edge_attr_and_node_attr_subgraph_hash" + ): no_object_dtype_for_edges, + key( + "test_summarization.py:TestSNAPNoEdgeTypes.test_summary_graph" + ): no_object_dtype_for_edges, + key( + "test_summarization.py:TestSNAPUndirected.test_summary_graph" + ): no_object_dtype_for_edges, + key( + "test_summarization.py:TestSNAPDirected.test_summary_graph" + ): no_object_dtype_for_edges, + key( + "test_gexf.py:TestGEXF.test_relabel" + ): no_object_dtype_for_edges, + key( + "test_gml.py:TestGraph.test_parse_gml_cytoscape_bug" + ): no_object_dtype_for_edges, + key( + "test_gml.py:TestGraph.test_parse_gml" + ): no_object_dtype_for_edges, + key( + "test_gml.py:TestGraph.test_read_gml" + ): no_object_dtype_for_edges, + key( + "test_gml.py:TestGraph.test_data_types" + ): no_object_dtype_for_edges, + key( + "test_gml.py:" + "TestPropertyLists.test_reading_graph_with_list_property" + ): no_object_dtype_for_edges, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multidigraph_inout_merge_nodes" + ): no_string_dtype, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multigraph_merge_inplace" + ): no_string_dtype, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multidigraph_merge_inplace" + ): no_string_dtype, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multidigraph_inout_copy" + ): no_string_dtype, + key( + "test_relabel.py:TestRelabel.test_relabel_multigraph_merge_copy" + ): no_string_dtype, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multidigraph_merge_copy" + ): no_string_dtype, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multigraph_nonnumeric_key" + ): no_string_dtype, + key( + "test_contraction.py:test_multigraph_path" + ): no_object_dtype_for_edges, + key( + "test_contraction.py:test_directed_multigraph_path" + ): no_object_dtype_for_edges, + key( + "test_contraction.py:test_multigraph_blockmodel" + ): no_object_dtype_for_edges, + key( + "test_summarization.py:" + "TestSNAPUndirectedMulti.test_summary_graph" + ): no_string_dtype, + key( + "test_summarization.py:TestSNAPDirectedMulti.test_summary_graph" + ): no_string_dtype, + } + ) + else: + xfail.update( + { + key( + "test_gml.py:" + "TestPropertyLists.test_reading_graph_with_list_property" + ): no_mixed_dtypes_for_nodes, + } + ) if _nxver <= (3, 2): xfail.update( @@ -337,22 +379,23 @@ def key(testpath): "Louvain does not support seed parameter" ) if _nxver >= (3, 2): - xfail.update( - { - key( - "test_convert_pandas.py:TestConvertPandas." - "test_from_edgelist_multi_attr_incl_target" - ): no_string_dtype, - key( - "test_convert_pandas.py:TestConvertPandas." - "test_from_edgelist_multidigraph_and_edge_attr" - ): no_string_dtype, - key( - "test_convert_pandas.py:TestConvertPandas." - "test_from_edgelist_int_attr_name" - ): no_string_dtype, - } - ) + if not fallback: + xfail.update( + { + key( + "test_convert_pandas.py:TestConvertPandas." + "test_from_edgelist_multi_attr_incl_target" + ): no_string_dtype, + key( + "test_convert_pandas.py:TestConvertPandas." + "test_from_edgelist_multidigraph_and_edge_attr" + ): no_string_dtype, + key( + "test_convert_pandas.py:TestConvertPandas." + "test_from_edgelist_int_attr_name" + ): no_string_dtype, + } + ) if _nxver[1] == 2: different_iteration_order = "Different graph data iteration order" xfail.update( diff --git a/python/nx-cugraph/nx_cugraph/relabel.py b/python/nx-cugraph/nx_cugraph/relabel.py index 75d75b2b624..fe18183dd45 100644 --- a/python/nx-cugraph/nx_cugraph/relabel.py +++ b/python/nx-cugraph/nx_cugraph/relabel.py @@ -36,11 +36,7 @@ def relabel_nodes(G, mapping, copy=True): "Using `copy=False` is invalid when using a NetworkX graph " "as input to `nx_cugraph.relabel_nodes`" ) - try: - G = nxcg.from_networkx(G, preserve_all_attrs=True) - except Exception as exc: - # XXX: this diaper pattern may be generally useful for 'zero'; what's best? - raise NotImplementedError("TODO") from exc + G = nxcg.from_networkx(G, preserve_all_attrs=True) else: zero = False diff --git a/python/nx-cugraph/run_nx_tests.sh b/python/nx-cugraph/run_nx_tests.sh index bceec53b7d5..b67343c7d3c 100755 --- a/python/nx-cugraph/run_nx_tests.sh +++ b/python/nx-cugraph/run_nx_tests.sh @@ -18,6 +18,10 @@ # testing takes longer. Without it, tests will xfail when encountering a # function that we don't implement. # +# NX_CUGRAPH_ZERO, {"True", "False"}, default is "True" +# Whether to use `nxcg.ZeroGraph` as the nx_cugraph backend graph. +# A ZeroGraph should be a compatible NetworkX graph, so fewer tests should fail. +# # Coverage of `nx_cugraph.algorithms` is reported and is a good sanity check # that algorithms run.