Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-23.10 to branch-23.12 #3902

Merged
merged 1 commit into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions python/cugraph/cugraph/layout/force_atlas2_wrapper.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,11 @@ def force_atlas2(input_graph,
if not input_graph.edgelist:
input_graph.view_edge_list()

# FIXME: This implementation assumes that the number of vertices
# is the max vertex ID + 1 which is not always the case.
num_verts = input_graph.nodes().max() + 1
# this code allows handling of renumbered graphs
if input_graph.is_renumbered():
num_verts = input_graph.renumber_map.df_internal_to_external['id'].max()+1
else:
num_verts = input_graph.nodes().max() + 1
num_edges = len(input_graph.edgelist.edgelist_df['src'])

cdef GraphCOOView[int,int,float] graph_float
Expand Down
145 changes: 62 additions & 83 deletions python/cugraph/cugraph/tests/layout/test_force_atlas2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,46 @@

import time
import pytest
import scipy.io
from sklearn.manifold import trustworthiness

import cudf
import cugraph
from cugraph.structure import number_map
from cugraph.internals import GraphBasedDimRedCallback
from cugraph.datasets import karate, polbooks, dolphins, netscience
from sklearn.manifold import trustworthiness
import scipy.io
from cugraph.datasets import (
karate,
polbooks,
dolphins,
netscience,
dining_prefs,
)

# FIXME Removed the multi column positional due to it being non-deterministic
# need to replace this coverage. Issue 3890 in cuGraph repo was created.

# This method renumbers a dataframe so it can be tested using Trustworthiness.
# it converts a dataframe with string vertex ids to a renumbered int one.


def renumbered_edgelist(df):
renumbered_df, num_map = number_map.NumberMap.renumber(df, "src", "dst")
new_df = renumbered_df[["renumbered_src", "renumbered_dst", "wgt"]]
column_names = {"renumbered_src": "src", "renumbered_dst": "dst"}
new_df = new_df.rename(columns=column_names)
return new_df


# This method converts a dataframe to a sparce matrix that is required by
# scipy Trustworthiness to verify the layout
def get_coo_array(edgelist):
coo = edgelist
x = max(coo["src"].max(), coo["dst"].max()) + 1
row = coo["src"].to_numpy()
col = coo["dst"].to_numpy()
data = coo["wgt"].to_numpy()
M = scipy.sparse.coo_array((data, (row, col)), shape=(x, x))

return M


def cugraph_call(
Expand All @@ -37,11 +70,15 @@ def cugraph_call(
strong_gravity_mode,
gravity,
callback=None,
renumber=False,
):

G = cugraph.Graph()
if cu_M["src"] is not int or cu_M["dst"] is not int:
renumber = True
else:
renumber = False
G.from_cudf_edgelist(
cu_M, source="src", destination="dst", edge_attr="wgt", renumber=False
cu_M, source="src", destination="dst", edge_attr="wgt", renumber=renumber
)

t1 = time.time()
Expand All @@ -66,7 +103,19 @@ def cugraph_call(
return pos


DATASETS = [(karate, 0.70), (polbooks, 0.75), (dolphins, 0.66), (netscience, 0.66)]
DATASETS = [
(karate, 0.70),
(polbooks, 0.75),
(dolphins, 0.66),
(netscience, 0.66),
(dining_prefs, 0.50),
]

DATASETS2 = [
(polbooks, 0.75),
(dolphins, 0.66),
(netscience, 0.66),
]


MAX_ITERATIONS = [500]
Expand Down Expand Up @@ -95,8 +144,7 @@ def on_train_end(self, positions):
@pytest.mark.parametrize("max_iter", MAX_ITERATIONS)
@pytest.mark.parametrize("barnes_hut_optimize", BARNES_HUT_OPTIMIZE)
def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
cu_M = graph_file.get_edgelist()
dataset_path = graph_file.get_path()
cu_M = graph_file.get_edgelist(download=True)
test_callback = TestCallback()
cu_pos = cugraph_call(
cu_M,
Expand Down Expand Up @@ -126,9 +174,11 @@ def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
iterations on a given graph.
"""

matrix_file = dataset_path.with_suffix(".mtx")
M = scipy.io.mmread(matrix_file)
M = M.toarray()
if "string" in graph_file.metadata["col_types"]:
df = renumbered_edgelist(graph_file.get_edgelist(download=True))
M = get_coo_array(df)
else:
M = get_coo_array(graph_file.get_edgelist(download=True))
cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas())
print(cu_trust, score)
assert cu_trust > score
Expand All @@ -138,74 +188,3 @@ def test_force_atlas2(graph_file, score, max_iter, barnes_hut_optimize):
assert test_callback.on_epoch_end_called_count == max_iter
# verify `on_train_end` was only called once
assert test_callback.on_train_end_called_count == 1


# FIXME: this test occasionally fails - skipping to prevent CI failures but
# need to revisit ASAP
@pytest.mark.sg
@pytest.mark.skip(reason="non-deterministric - needs fixing!")
@pytest.mark.parametrize("graph_file, score", DATASETS[:-1])
@pytest.mark.parametrize("max_iter", MAX_ITERATIONS)
@pytest.mark.parametrize("barnes_hut_optimize", BARNES_HUT_OPTIMIZE)
def test_force_atlas2_multi_column_pos_list(
graph_file, score, max_iter, barnes_hut_optimize
):
cu_M = graph_file.get_edgelist()
dataset_path = graph_file.get_path()
test_callback = TestCallback()
pos = cugraph_call(
cu_M,
max_iter=max_iter,
pos_list=None,
outbound_attraction_distribution=True,
lin_log_mode=False,
prevent_overlapping=False,
edge_weight_influence=1.0,
jitter_tolerance=1.0,
barnes_hut_optimize=False,
barnes_hut_theta=0.5,
scaling_ratio=2.0,
strong_gravity_mode=False,
gravity=1.0,
callback=test_callback,
)

cu_M.rename(columns={"0": "src_0", "1": "dst_0"}, inplace=True)
cu_M["src_1"] = cu_M["src_0"] + 1000
cu_M["dst_1"] = cu_M["dst_0"] + 1000

G = cugraph.Graph()
G.from_cudf_edgelist(
cu_M, source=["src_0", "src_1"], destination=["dst_0", "dst_1"], edge_attr="2"
)

pos_list = cudf.DataFrame()
pos_list["vertex_0"] = pos["vertex"]
pos_list["vertex_1"] = pos_list["vertex_0"] + 1000
pos_list["x"] = pos["x"]
pos_list["y"] = pos["y"]

cu_pos = cugraph.force_atlas2(
G,
max_iter=max_iter,
pos_list=pos_list,
outbound_attraction_distribution=True,
lin_log_mode=False,
prevent_overlapping=False,
edge_weight_influence=1.0,
jitter_tolerance=1.0,
barnes_hut_optimize=False,
barnes_hut_theta=0.5,
scaling_ratio=2.0,
strong_gravity_mode=False,
gravity=1.0,
callback=test_callback,
)

cu_pos = cu_pos.sort_values("0_vertex")
matrix_file = dataset_path.with_suffix(".mtx")
M = scipy.io.mmread(matrix_file)
M = M.todense()
cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas())
print(cu_trust, score)
assert cu_trust > score
Loading