From f53bb56dc3245f64523aeeb997430c8f49de4624 Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Fri, 22 Sep 2023 12:44:31 -0400 Subject: [PATCH] Fix torch seed in `cugraph-dgl` and `-pyg` tests for conv layers (#3869) Fixes https://github.com/rapidsai/graph_dl/issues/325 Recently, a few CI runs (ex. [1](https://github.com/rapidsai/cugraph/actions/runs/6254253684/job/16983164330?pr=3828#step:7:5078), [2](https://github.com/rapidsai/cugraph/actions/runs/6224345348/job/16896416094?pr=3843)) failed when comparing results from cugraph-ops-based conv layers against results from upstream frameworks. The tests pass most of the time, but occasionally fail due to a combination of using a strict tolerance and bad numerics (floating point error). This PR fixes the seed used for generating random feature tensors so that CI behaves consistently across different runs. Authors: - Tingyu Wang (https://github.com/tingyu66) Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) URL: https://github.com/rapidsai/cugraph/pull/3869 --- python/cugraph-dgl/tests/nn/test_gatconv.py | 2 ++ python/cugraph-dgl/tests/nn/test_gatv2conv.py | 2 ++ python/cugraph-dgl/tests/nn/test_relgraphconv.py | 15 +++++++++++---- python/cugraph-dgl/tests/nn/test_sageconv.py | 1 + .../cugraph-dgl/tests/nn/test_transformerconv.py | 1 + .../cugraph_pyg/tests/nn/test_gat_conv.py | 1 + .../cugraph_pyg/tests/nn/test_gatv2_conv.py | 1 + .../cugraph_pyg/tests/nn/test_rgcn_conv.py | 1 + .../cugraph_pyg/tests/nn/test_sage_conv.py | 1 + .../cugraph_pyg/tests/nn/test_transformer_conv.py | 1 + 10 files changed, 22 insertions(+), 4 deletions(-) diff --git a/python/cugraph-dgl/tests/nn/test_gatconv.py b/python/cugraph-dgl/tests/nn/test_gatconv.py index ef3047dc2cd..ce145b2bc87 100644 --- a/python/cugraph-dgl/tests/nn/test_gatconv.py +++ b/python/cugraph-dgl/tests/nn/test_gatconv.py @@ -35,6 +35,7 @@ def test_gatconv_equality( ): from dgl.nn.pytorch import GATConv + torch.manual_seed(12345) g = create_graph1().to("cuda") if idtype_int: @@ -121,6 +122,7 @@ def test_gatconv_equality( def test_gatconv_edge_feats( bias, bipartite, concat, max_in_degree, num_heads, to_block, use_edge_feats ): + torch.manual_seed(12345) g = create_graph1().to("cuda") if to_block: diff --git a/python/cugraph-dgl/tests/nn/test_gatv2conv.py b/python/cugraph-dgl/tests/nn/test_gatv2conv.py index cc46a6e4b39..52003edacca 100644 --- a/python/cugraph-dgl/tests/nn/test_gatv2conv.py +++ b/python/cugraph-dgl/tests/nn/test_gatv2conv.py @@ -35,6 +35,7 @@ def test_gatv2conv_equality( ): from dgl.nn.pytorch import GATv2Conv + torch.manual_seed(12345) g = create_graph1().to("cuda") if idtype_int: @@ -109,6 +110,7 @@ def test_gatv2conv_equality( def test_gatv2conv_edge_feats( bias, bipartite, concat, max_in_degree, num_heads, to_block, use_edge_feats ): + torch.manual_seed(12345) g = create_graph1().to("cuda") if to_block: diff --git a/python/cugraph-dgl/tests/nn/test_relgraphconv.py b/python/cugraph-dgl/tests/nn/test_relgraphconv.py index 901f9ba1433..bdaa89e57f2 100644 --- a/python/cugraph-dgl/tests/nn/test_relgraphconv.py +++ b/python/cugraph-dgl/tests/nn/test_relgraphconv.py @@ -41,6 +41,7 @@ def test_relgraphconv_equality( ): from dgl.nn.pytorch import RelGraphConv + torch.manual_seed(12345) in_feat, out_feat, num_rels = 10, 2, 3 args = (in_feat, out_feat, num_rels) kwargs = { @@ -75,12 +76,18 @@ def test_relgraphconv_equality( size=size, src_ids=indices, cdst_ids=offsets, values=etypes, formats="csc" ) - torch.manual_seed(0) conv1 = RelGraphConv(*args, **kwargs).cuda() + conv2 = CuGraphRelGraphConv(*args, **kwargs, apply_norm=False).cuda() - torch.manual_seed(0) - kwargs["apply_norm"] = False - conv2 = CuGraphRelGraphConv(*args, **kwargs).cuda() + with torch.no_grad(): + if self_loop: + conv2.W.data[:-1] = conv1.linear_r.W.data + conv2.W.data[-1] = conv1.loop_weight.data + else: + conv2.W.data = conv1.linear_r.W.data.detach().clone() + + if regularizer is not None: + conv2.coeff.data = conv1.linear_r.coeff.data.detach().clone() out1 = conv1(g, feat, g.edata[dgl.ETYPE]) diff --git a/python/cugraph-dgl/tests/nn/test_sageconv.py b/python/cugraph-dgl/tests/nn/test_sageconv.py index e2acf9e6596..b5d0a44b868 100644 --- a/python/cugraph-dgl/tests/nn/test_sageconv.py +++ b/python/cugraph-dgl/tests/nn/test_sageconv.py @@ -35,6 +35,7 @@ def test_sageconv_equality( ): from dgl.nn.pytorch import SAGEConv + torch.manual_seed(12345) kwargs = {"aggregator_type": aggr, "bias": bias} g = create_graph1().to("cuda") diff --git a/python/cugraph-dgl/tests/nn/test_transformerconv.py b/python/cugraph-dgl/tests/nn/test_transformerconv.py index b2b69cb35ab..5ac4fd7bea7 100644 --- a/python/cugraph-dgl/tests/nn/test_transformerconv.py +++ b/python/cugraph-dgl/tests/nn/test_transformerconv.py @@ -41,6 +41,7 @@ def test_transformerconv( use_edge_feats, sparse_format, ): + torch.manual_seed(12345) device = "cuda" g = create_graph1().to(device) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py index 21c43bad38c..62bebb9211d 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gat_conv.py @@ -32,6 +32,7 @@ def test_gat_conv_equality( import torch from torch_geometric.nn import GATConv + torch.manual_seed(12345) edge_index, size = request.getfixturevalue(graph) edge_index = edge_index.cuda() diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py index 6b11e87154a..a4794628410 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_gatv2_conv.py @@ -28,6 +28,7 @@ def test_gatv2_conv_equality(bipartite, concat, heads, use_edge_attr, graph, req import torch from torch_geometric.nn import GATv2Conv + torch.manual_seed(12345) edge_index, size = request.getfixturevalue(graph) edge_index = edge_index.cuda() diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py index 233c6aa2836..ded4f300c0c 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_rgcn_conv.py @@ -31,6 +31,7 @@ def test_rgcn_conv_equality( import torch from torch_geometric.nn import FastRGCNConv as RGCNConv + torch.manual_seed(12345) in_channels, out_channels, num_relations = (4, 2, 3) kwargs = dict(aggr=aggr, bias=bias, num_bases=num_bases, root_weight=root_weight) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py index 7f73cddbdbb..b2977d1d175 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_sage_conv.py @@ -32,6 +32,7 @@ def test_sage_conv_equality( import torch from torch_geometric.nn import SAGEConv + torch.manual_seed(12345) edge_index, size = request.getfixturevalue(graph) edge_index = edge_index.cuda() csc = CuGraphSAGEConv.to_csc(edge_index, size) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py index 7dba1a6d515..fbdb244898b 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_transformer_conv.py @@ -27,6 +27,7 @@ def test_transformer_conv_equality(bipartite, concat, heads, graph, request): import torch from torch_geometric.nn import TransformerConv + torch.manual_seed(12345) edge_index, size = request.getfixturevalue(graph) edge_index = edge_index.cuda() csc = CuGraphTransformerConv.to_csc(edge_index, size)