From f5d008afddb8ee3d8d6f9c9e60f88eeceb079e5e Mon Sep 17 00:00:00 2001
From: acostadon <dacosta@nvidia.com>
Date: Wed, 27 Sep 2023 14:53:16 -0400
Subject: [PATCH 1/6] fixed force atlas to work with string vertex ids and
 added tests removed dependence on mtx files in test.

---
 .../cugraph/layout/force_atlas2_wrapper.pyx   |  8 ++-
 .../cugraph/tests/layout/test_force_atlas2.py | 71 ++++++++++++++++---
 2 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx
index 4258be3ef71..5a2784e2363 100644
--- a/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx
+++ b/python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx
@@ -56,9 +56,11 @@ def force_atlas2(input_graph,
     if not input_graph.edgelist:
         input_graph.view_edge_list()
 
-    # FIXME: This implementation assumes that the number of vertices
-    # is the max vertex ID + 1 which is not always the case.
-    num_verts = input_graph.nodes().max() + 1
+    # this code allows handling of renumbered graphs
+    if input_graph.is_renumbered():
+        num_verts = input_graph.renumber_map.df_internal_to_external['id'].max()+1
+    else:
+        num_verts = input_graph.nodes().max() + 1
     num_edges = len(input_graph.edgelist.edgelist_df['src'])
 
     cdef GraphCOOView[int,int,float] graph_float
diff --git a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
index 495a2d945c0..9eb8309fb67 100644
--- a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
+++ b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
@@ -13,13 +13,49 @@
 
 import time
 import pytest
-import scipy.io
-from sklearn.manifold import trustworthiness
 
 import cudf
 import cugraph
+from cugraph.structure import number_map
 from cugraph.internals import GraphBasedDimRedCallback
-from cugraph.datasets import karate, polbooks, dolphins, netscience
+from sklearn.manifold import trustworthiness
+import scipy.io
+from cugraph.datasets import (
+    karate,
+    polbooks,
+    dolphins,
+    netscience,
+    dining_prefs,
+)
+
+# Temporarily suppress warnings till networkX fixes deprecation warnings
+# (Using or importing the ABCs from 'collections' instead of from
+# 'collections.abc' is deprecated, and in 3.8 it will stop working) for
+# python 3.7.  Also, these import fa2 and import networkx need to be
+# relocated in the third-party group once this gets fixed.
+
+
+# This method renumbers a dataframe so it can be tested using Trustworthiness.
+# it converts a dataframe with string vertex ids to a renumbered int one.
+def renumbered_edgelist(df):
+    renumbered_df, num_map = number_map.NumberMap.renumber(df, "src", "dst")
+    new_df = renumbered_df[["renumbered_src", "renumbered_dst", "wgt"]]
+    column_names = {"renumbered_src": "src", "renumbered_dst": "dst"}
+    new_df = new_df.rename(columns=column_names)
+    return new_df
+
+
+# This method converts a dataframe to a sparce matrix that is required by
+# scipy Trustworthiness to verify the layout
+def get_coo_array(edgelist):
+    coo = edgelist
+    x = max(coo["src"].max(), coo["dst"].max()) + 1
+    row = coo["src"].to_numpy()
+    col = coo["dst"].to_numpy()
+    data = coo["wgt"].to_numpy()
+    M = scipy.sparse.coo_array((data, (row, col)), shape=(x, x))
+
+    return M
 
 
 def cugraph_call(
@@ -37,11 +73,15 @@ def cugraph_call(
     strong_gravity_mode,
     gravity,
     callback=None,
+    renumber=False,
 ):
-
     G = cugraph.Graph()
+    if cu_M["src"] is not int or cu_M["dst"] is not int:
+        renumber = True
+    else:
+        renumber = False
     G.from_cudf_edgelist(
-        cu_M, source="src", destination="dst", edge_attr="wgt", renumber=False
+        cu_M, source="src", destination="dst", edge_attr="wgt", renumber=renumber
     )
 
     t1 = time.time()
@@ -66,7 +106,13 @@ def cugraph_call(
     return pos
 
 
-DATASETS = [(karate, 0.70), (polbooks, 0.75), (dolphins, 0.66), (netscience, 0.66)]
+DATASETS = [
+    (karate, 0.70),
+    (polbooks, 0.75),
+    (dolphins, 0.66),
+    (netscience, 0.66),
+    (dining_prefs, 0.50),
+]
 
 
 MAX_ITERATIONS = [500]
@@ -96,7 +142,6 @@ def on_train_end(self, positions):
 @pytest.mark.parametrize("barnes_hut_optimize", BARNES_HUT_OPTIMIZE)
 def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
     cu_M = graph_file.get_edgelist()
-    dataset_path = graph_file.get_path()
     test_callback = TestCallback()
     cu_pos = cugraph_call(
         cu_M,
@@ -126,9 +171,14 @@ def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
         iterations on a given graph.
     """
 
-    matrix_file = dataset_path.with_suffix(".mtx")
-    M = scipy.io.mmread(matrix_file)
-    M = M.toarray()
+    #    matrix_file = dataset_path.with_suffix(".mtx")
+    #    M = scipy.io.mmread(matrix_file)
+    #    M = M.toarray()
+    if "string" in graph_file.metadata["col_types"]:
+        df = renumbered_edgelist(graph_file.get_edgelist())
+        M = get_coo_array(df)
+    else:
+        M = get_coo_array(graph_file.get_edgelist())
     cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas())
     print(cu_trust, score)
     assert cu_trust > score
@@ -205,6 +255,7 @@ def test_force_atlas2_multi_column_pos_list(
     cu_pos = cu_pos.sort_values("0_vertex")
     matrix_file = dataset_path.with_suffix(".mtx")
     M = scipy.io.mmread(matrix_file)
+    M = cugraph.structure.graph_to_csr(G)
     M = M.todense()
     cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas())
     print(cu_trust, score)

From eaa7c6f21b3b1048ee2a6dea32940bd134b1b47c Mon Sep 17 00:00:00 2001
From: acostadon <dacosta@nvidia.com>
Date: Wed, 27 Sep 2023 15:57:07 -0400
Subject: [PATCH 2/6] removed multi-column test due to it being
 non-deterministic

---
 .../cugraph/tests/layout/test_force_atlas2.py | 80 ++-----------------
 1 file changed, 6 insertions(+), 74 deletions(-)

diff --git a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
index 9eb8309fb67..025b5213f77 100644
--- a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
+++ b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
@@ -114,6 +114,12 @@ def cugraph_call(
     (dining_prefs, 0.50),
 ]
 
+DATASETS2 = [
+    (polbooks, 0.75),
+    (dolphins, 0.66),
+    (netscience, 0.66),
+]
+
 
 MAX_ITERATIONS = [500]
 BARNES_HUT_OPTIMIZE = [False, True]
@@ -171,9 +177,6 @@ def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
         iterations on a given graph.
     """
 
-    #    matrix_file = dataset_path.with_suffix(".mtx")
-    #    M = scipy.io.mmread(matrix_file)
-    #    M = M.toarray()
     if "string" in graph_file.metadata["col_types"]:
         df = renumbered_edgelist(graph_file.get_edgelist())
         M = get_coo_array(df)
@@ -189,74 +192,3 @@ def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
     # verify `on_train_end` was only called once
     assert test_callback.on_train_end_called_count == 1
 
-
-# FIXME: this test occasionally fails - skipping to prevent CI failures but
-# need to revisit ASAP
-@pytest.mark.sg
-@pytest.mark.skip(reason="non-deterministric - needs fixing!")
-@pytest.mark.parametrize("graph_file, score", DATASETS[:-1])
-@pytest.mark.parametrize("max_iter", MAX_ITERATIONS)
-@pytest.mark.parametrize("barnes_hut_optimize", BARNES_HUT_OPTIMIZE)
-def test_force_atlas2_multi_column_pos_list(
-    graph_file, score, max_iter, barnes_hut_optimize
-):
-    cu_M = graph_file.get_edgelist()
-    dataset_path = graph_file.get_path()
-    test_callback = TestCallback()
-    pos = cugraph_call(
-        cu_M,
-        max_iter=max_iter,
-        pos_list=None,
-        outbound_attraction_distribution=True,
-        lin_log_mode=False,
-        prevent_overlapping=False,
-        edge_weight_influence=1.0,
-        jitter_tolerance=1.0,
-        barnes_hut_optimize=False,
-        barnes_hut_theta=0.5,
-        scaling_ratio=2.0,
-        strong_gravity_mode=False,
-        gravity=1.0,
-        callback=test_callback,
-    )
-
-    cu_M.rename(columns={"0": "src_0", "1": "dst_0"}, inplace=True)
-    cu_M["src_1"] = cu_M["src_0"] + 1000
-    cu_M["dst_1"] = cu_M["dst_0"] + 1000
-
-    G = cugraph.Graph()
-    G.from_cudf_edgelist(
-        cu_M, source=["src_0", "src_1"], destination=["dst_0", "dst_1"], edge_attr="2"
-    )
-
-    pos_list = cudf.DataFrame()
-    pos_list["vertex_0"] = pos["vertex"]
-    pos_list["vertex_1"] = pos_list["vertex_0"] + 1000
-    pos_list["x"] = pos["x"]
-    pos_list["y"] = pos["y"]
-
-    cu_pos = cugraph.force_atlas2(
-        G,
-        max_iter=max_iter,
-        pos_list=pos_list,
-        outbound_attraction_distribution=True,
-        lin_log_mode=False,
-        prevent_overlapping=False,
-        edge_weight_influence=1.0,
-        jitter_tolerance=1.0,
-        barnes_hut_optimize=False,
-        barnes_hut_theta=0.5,
-        scaling_ratio=2.0,
-        strong_gravity_mode=False,
-        gravity=1.0,
-        callback=test_callback,
-    )
-
-    cu_pos = cu_pos.sort_values("0_vertex")
-    matrix_file = dataset_path.with_suffix(".mtx")
-    M = scipy.io.mmread(matrix_file)
-    M = cugraph.structure.graph_to_csr(G)
-    M = M.todense()
-    cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas())
-    print(cu_trust, score)
-    assert cu_trust > score

From 0972bd4145704b59ffe575a564b48361ea3a8847 Mon Sep 17 00:00:00 2001
From: acostadon <dacosta@nvidia.com>
Date: Wed, 27 Sep 2023 15:59:17 -0400
Subject: [PATCH 3/6] removed dependency pointed out by flak8

---
 python/cugraph/cugraph/tests/layout/test_force_atlas2.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
index 025b5213f77..9e5a6a323ea 100644
--- a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
+++ b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
@@ -14,7 +14,6 @@
 import time
 import pytest
 
-import cudf
 import cugraph
 from cugraph.structure import number_map
 from cugraph.internals import GraphBasedDimRedCallback
@@ -191,4 +190,3 @@ def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
     assert test_callback.on_epoch_end_called_count == max_iter
     # verify `on_train_end` was only called once
     assert test_callback.on_train_end_called_count == 1
-

From 5fcf90450744308bfee0846e98aad29930f871da Mon Sep 17 00:00:00 2001
From: acostadon <dacosta@nvidia.com>
Date: Thu, 28 Sep 2023 09:03:08 -0400
Subject: [PATCH 4/6] added download equals True

---
 python/cugraph/cugraph/tests/layout/test_force_atlas2.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
index 9e5a6a323ea..87ace22fa1f 100644
--- a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
+++ b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
@@ -146,7 +146,7 @@ def on_train_end(self, positions):
 @pytest.mark.parametrize("max_iter", MAX_ITERATIONS)
 @pytest.mark.parametrize("barnes_hut_optimize", BARNES_HUT_OPTIMIZE)
 def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
-    cu_M = graph_file.get_edgelist()
+    cu_M = graph_file.get_edgelist(download=True)
     test_callback = TestCallback()
     cu_pos = cugraph_call(
         cu_M,
@@ -177,10 +177,10 @@ def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
     """
 
     if "string" in graph_file.metadata["col_types"]:
-        df = renumbered_edgelist(graph_file.get_edgelist())
+        df = renumbered_edgelist(graph_file.get_edgelist(download=True))
         M = get_coo_array(df)
     else:
-        M = get_coo_array(graph_file.get_edgelist())
+        M = get_coo_array(graph_file.get_edgelist(download=True))
     cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas())
     print(cu_trust, score)
     assert cu_trust > score

From b3a3bdf9bc3a1433a05503282c1b6fc50db3a8e8 Mon Sep 17 00:00:00 2001
From: acostadon <dacosta@nvidia.com>
Date: Fri, 29 Sep 2023 07:52:27 -0400
Subject: [PATCH 5/6] added fixme and issue per review comments

---
 .../cugraph/cugraph/tests/layout/test_force_atlas2.py  | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
index 87ace22fa1f..6b1fd6bcc4e 100644
--- a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
+++ b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
@@ -27,15 +27,13 @@
     dining_prefs,
 )
 
-# Temporarily suppress warnings till networkX fixes deprecation warnings
-# (Using or importing the ABCs from 'collections' instead of from
-# 'collections.abc' is deprecated, and in 3.8 it will stop working) for
-# python 3.7.  Also, these import fa2 and import networkx need to be
-# relocated in the third-party group once this gets fixed.
-
+# FIXME Removed the multi column positional due to it being non-deterministic
+# need to replace this coverage. Issue 3890 in cuGraph repo was created.
 
 # This method renumbers a dataframe so it can be tested using Trustworthiness.
 # it converts a dataframe with string vertex ids to a renumbered int one.
+
+
 def renumbered_edgelist(df):
     renumbered_df, num_map = number_map.NumberMap.renumber(df, "src", "dst")
     new_df = renumbered_df[["renumbered_src", "renumbered_dst", "wgt"]]

From 481e71ee411bb40b87af8bad5ac3cfe00217eb0a Mon Sep 17 00:00:00 2001
From: acostadon <dacosta@nvidia.com>
Date: Fri, 29 Sep 2023 08:05:04 -0400
Subject: [PATCH 6/6] flak8 reformat

---
 python/cugraph/cugraph/tests/layout/test_force_atlas2.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
index ab95aeec4be..6b1fd6bcc4e 100644
--- a/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
+++ b/python/cugraph/cugraph/tests/layout/test_force_atlas2.py
@@ -26,6 +26,7 @@
     netscience,
     dining_prefs,
 )
+
 # FIXME Removed the multi column positional due to it being non-deterministic
 # need to replace this coverage. Issue 3890 in cuGraph repo was created.