diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py index 8843e61ad89..cc4ce474f2d 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional, Tuple, Union +from typing import Optional, Union from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph from cugraph.utilities.utils import import_optional @@ -29,7 +29,7 @@ class GATConv(BaseConv): Parameters ---------- - in_feats : int or tuple + in_feats : int or (int, int) Input feature size. A pair denotes feature sizes of source and destination nodes. out_feats : int @@ -92,7 +92,7 @@ class GATConv(BaseConv): def __init__( self, - in_feats: Union[int, Tuple[int, int]], + in_feats: Union[int, tuple[int, int]], out_feats: int, num_heads: int, feat_drop: float = 0.0, @@ -104,14 +104,19 @@ def __init__( bias: bool = True, ): super().__init__() + + if isinstance(in_feats, int): + self.in_feats_src = self.in_feats_dst = in_feats + else: + self.in_feats_src, self.in_feats_dst = in_feats self.in_feats = in_feats self.out_feats = out_feats - self.in_feats_src, self.in_feats_dst = dgl.utils.expand_as_pair(in_feats) self.num_heads = num_heads self.feat_drop = nn.Dropout(feat_drop) self.concat = concat self.edge_feats = edge_feats self.negative_slope = negative_slope + self.residual = residual self.allow_zero_in_degree = allow_zero_in_degree if isinstance(in_feats, int): @@ -126,28 +131,34 @@ def __init__( if edge_feats is not None: self.lin_edge = nn.Linear(edge_feats, num_heads * out_feats, bias=False) - self.attn_weights = nn.Parameter(torch.Tensor(3 * num_heads * out_feats)) + self.attn_weights = nn.Parameter(torch.empty(3 * num_heads * out_feats)) else: self.register_parameter("lin_edge", None) - self.attn_weights = nn.Parameter(torch.Tensor(2 * num_heads * out_feats)) + self.attn_weights = nn.Parameter(torch.empty(2 * num_heads * out_feats)) - if bias and concat: - self.bias = nn.Parameter(torch.Tensor(num_heads, out_feats)) - elif bias and not concat: - self.bias = nn.Parameter(torch.Tensor(out_feats)) + out_dim = num_heads * out_feats if concat else out_feats + if residual: + if self.in_feats_dst != out_dim: + self.lin_res = nn.Linear(self.in_feats_dst, out_dim, bias=bias) + else: + self.lin_res = nn.Identity() else: - self.register_buffer("bias", None) + self.register_buffer("lin_res", None) - self.residual = residual and self.in_feats_dst != out_feats * num_heads - if self.residual: - self.lin_res = nn.Linear( - self.in_feats_dst, num_heads * out_feats, bias=bias - ) + if bias and not isinstance(self.lin_res, nn.Linear): + if concat: + self.bias = nn.Parameter(torch.empty(num_heads, out_feats)) + else: + self.bias = nn.Parameter(torch.empty(out_feats)) else: - self.register_buffer("lin_res", None) + self.register_buffer("bias", None) self.reset_parameters() + def set_allow_zero_in_degree(self, set_value): + r"""Set allow_zero_in_degree flag.""" + self.allow_zero_in_degree = set_value + def reset_parameters(self): r"""Reinitialize learnable parameters.""" gain = nn.init.calculate_gain("relu") @@ -172,7 +183,7 @@ def reset_parameters(self): def forward( self, g: Union[SparseGraph, dgl.DGLHeteroGraph], - nfeat: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], + nfeat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], efeat: Optional[torch.Tensor] = None, max_in_degree: Optional[int] = None, ) -> torch.Tensor: @@ -182,8 +193,10 @@ def forward( ---------- graph : DGLGraph or SparseGraph The graph. - nfeat : torch.Tensor - Input features of shape :math:`(N, D_{in})`. + nfeat : torch.Tensor or (torch.Tensor, torch.Tensor) + Node features. If given as a tuple, the two elements correspond to + the source and destination node features, respectively, in a + bipartite graph. efeat: torch.Tensor, optional Optional edge features. max_in_degree : int @@ -237,18 +250,17 @@ def forward( if bipartite: if not hasattr(self, "lin_src"): - raise RuntimeError( - f"{self.__class__.__name__}.in_feats must be a pair of " - f"integers to allow bipartite node features, but got " - f"{self.in_feats}." - ) - nfeat_src = self.lin_src(nfeat[0]) - nfeat_dst = self.lin_dst(nfeat[1]) + nfeat_src = self.lin(nfeat[0]) + nfeat_dst = self.lin(nfeat[1]) + else: + nfeat_src = self.lin_src(nfeat[0]) + nfeat_dst = self.lin_dst(nfeat[1]) else: if not hasattr(self, "lin"): raise RuntimeError( f"{self.__class__.__name__}.in_feats is expected to be an " - f"integer, but got {self.in_feats}." + f"integer when the graph is not bipartite, " + f"but got {self.in_feats}." ) nfeat = self.lin(nfeat) diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py index 209a5fe1a8d..6c78b4df0b8 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional, Tuple, Union +from typing import Optional, Union from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph from cugraph.utilities.utils import import_optional @@ -29,14 +29,11 @@ class GATv2Conv(BaseConv): Parameters ---------- - in_feats : int, or pair of ints - Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. - If the layer is to be applied to a unidirectional bipartite graph, `in_feats` - specifies the input feature size on both the source and destination nodes. - If a scalar is given, the source and destination node feature size - would take the same value. + in_feats : int or (int, int) + Input feature size. A pair denotes feature sizes of source and + destination nodes. out_feats : int - Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. + Output feature size. num_heads : int Number of heads in Multi-Head Attention. feat_drop : float, optional @@ -58,17 +55,15 @@ class GATv2Conv(BaseConv): input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Defaults: ``False``. bias : bool, optional - If set to :obj:`False`, the layer will not learn - an additive bias. (default: :obj:`True`) + If True, learns a bias term. Defaults: ``True``. share_weights : bool, optional - If set to :obj:`True`, the same matrix for :math:`W_{left}` and - :math:`W_{right}` in the above equations, will be applied to the source - and the target node of every edge. (default: :obj:`False`) + If ``True``, the same matrix will be applied to the source and the + destination node features. Defaults: ``False``. """ def __init__( self, - in_feats: Union[int, Tuple[int, int]], + in_feats: Union[int, tuple[int, int]], out_feats: int, num_heads: int, feat_drop: float = 0.0, @@ -81,16 +76,22 @@ def __init__( share_weights: bool = False, ): super().__init__() + + if isinstance(in_feats, int): + self.in_feats_src = self.in_feats_dst = in_feats + else: + self.in_feats_src, self.in_feats_dst = in_feats self.in_feats = in_feats self.out_feats = out_feats - self.in_feats_src, self.in_feats_dst = dgl.utils.expand_as_pair(in_feats) self.num_heads = num_heads self.feat_drop = nn.Dropout(feat_drop) self.concat = concat self.edge_feats = edge_feats self.negative_slope = negative_slope + self.residual = residual self.allow_zero_in_degree = allow_zero_in_degree self.share_weights = share_weights + self.bias = bias self.lin_src = nn.Linear(self.in_feats_src, num_heads * out_feats, bias=bias) if share_weights: @@ -106,30 +107,28 @@ def __init__( self.in_feats_dst, num_heads * out_feats, bias=bias ) - self.attn = nn.Parameter(torch.Tensor(num_heads * out_feats)) + self.attn_weights = nn.Parameter(torch.empty(num_heads * out_feats)) if edge_feats is not None: self.lin_edge = nn.Linear(edge_feats, num_heads * out_feats, bias=False) else: self.register_parameter("lin_edge", None) - if bias and concat: - self.bias = nn.Parameter(torch.Tensor(num_heads, out_feats)) - elif bias and not concat: - self.bias = nn.Parameter(torch.Tensor(out_feats)) - else: - self.register_buffer("bias", None) - - self.residual = residual and self.in_feats_dst != out_feats * num_heads - if self.residual: - self.lin_res = nn.Linear( - self.in_feats_dst, num_heads * out_feats, bias=bias - ) + out_dim = num_heads * out_feats if concat else out_feats + if residual: + if self.in_feats_dst != out_dim: + self.lin_res = nn.Linear(self.in_feats_dst, out_dim, bias=bias) + else: + self.lin_res = nn.Identity() else: self.register_buffer("lin_res", None) self.reset_parameters() + def set_allow_zero_in_degree(self, set_value): + r"""Set allow_zero_in_degree flag.""" + self.allow_zero_in_degree = set_value + def reset_parameters(self): r"""Reinitialize learnable parameters.""" gain = nn.init.calculate_gain("relu") @@ -137,7 +136,7 @@ def reset_parameters(self): nn.init.xavier_normal_(self.lin_dst.weight, gain=gain) nn.init.xavier_normal_( - self.attn.view(-1, self.num_heads, self.out_feats), gain=gain + self.attn_weights.view(-1, self.num_heads, self.out_feats), gain=gain ) if self.lin_edge is not None: self.lin_edge.reset_parameters() @@ -145,13 +144,10 @@ def reset_parameters(self): if self.lin_res is not None: self.lin_res.reset_parameters() - if self.bias is not None: - nn.init.zeros_(self.bias) - def forward( self, g: Union[SparseGraph, dgl.DGLHeteroGraph], - nfeat: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], + nfeat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], efeat: Optional[torch.Tensor] = None, max_in_degree: Optional[int] = None, ) -> torch.Tensor: @@ -225,7 +221,7 @@ def forward( out = ops_torch.operators.mha_gat_v2_n2n( nfeat, - self.attn, + self.attn_weights, _graph, num_heads=self.num_heads, activation="LeakyReLU", @@ -243,7 +239,4 @@ def forward( res = res.mean(dim=1) out = out + res - if self.bias is not None: - out = out + self.bias - return out diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py index 54916674210..5c4b5dea441 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -100,16 +100,16 @@ def __init__( self.self_loop = self_loop if regularizer is None: self.W = nn.Parameter( - torch.Tensor(num_rels + dim_self_loop, in_feats, out_feats) + torch.empty(num_rels + dim_self_loop, in_feats, out_feats) ) self.coeff = None elif regularizer == "basis": if num_bases is None: raise ValueError('Missing "num_bases" for basis regularization.') self.W = nn.Parameter( - torch.Tensor(num_bases + dim_self_loop, in_feats, out_feats) + torch.empty(num_bases + dim_self_loop, in_feats, out_feats) ) - self.coeff = nn.Parameter(torch.Tensor(num_rels, num_bases)) + self.coeff = nn.Parameter(torch.empty(num_rels, num_bases)) self.num_bases = num_bases else: raise ValueError( @@ -119,7 +119,7 @@ def __init__( self.regularizer = regularizer if bias: - self.bias = nn.Parameter(torch.Tensor(out_feats)) + self.bias = nn.Parameter(torch.empty(out_feats)) else: self.register_parameter("bias", None) diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py index a3f946d7cb4..b6198903766 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional, Tuple, Union +from typing import Optional, Union from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph from cugraph.utilities.utils import import_optional @@ -65,7 +65,7 @@ class SAGEConv(BaseConv): def __init__( self, - in_feats: Union[int, Tuple[int, int]], + in_feats: Union[int, tuple[int, int]], out_feats: int, aggregator_type: str = "mean", feat_drop: float = 0.0, @@ -111,7 +111,7 @@ def reset_parameters(self): def forward( self, g: Union[SparseGraph, dgl.DGLHeteroGraph], - feat: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], + feat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], max_in_degree: Optional[int] = None, ) -> torch.Tensor: r"""Forward computation. diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py index 8481b9ee265..e77556fb76f 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional, Tuple, Union +from typing import Optional, Union from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph from cugraph.utilities.utils import import_optional @@ -51,7 +51,7 @@ class TransformerConv(BaseConv): def __init__( self, - in_node_feats: Union[int, Tuple[int, int]], + in_node_feats: Union[int, tuple[int, int]], out_node_feats: int, num_heads: int, concat: bool = True, @@ -116,7 +116,7 @@ def reset_parameters(self): def forward( self, g: Union[SparseGraph, dgl.DGLHeteroGraph], - nfeat: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], + nfeat: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], efeat: Optional[torch.Tensor] = None, ) -> torch.Tensor: """Forward computation. diff --git a/python/cugraph-dgl/tests/conftest.py b/python/cugraph-dgl/tests/conftest.py index a3863ed81fa..ee1183f5cd1 100644 --- a/python/cugraph-dgl/tests/conftest.py +++ b/python/cugraph-dgl/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,6 +13,7 @@ import pytest +import dgl import torch from cugraph.testing.mg_utils import ( @@ -58,3 +59,10 @@ class SparseGraphData1: @pytest.fixture def sparse_graph_1(): return SparseGraphData1() + + +@pytest.fixture +def dgl_graph_1(): + src = torch.tensor([0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9]) + dst = torch.tensor([1, 9, 2, 9, 9, 4, 9, 5, 9, 6, 9, 9, 8, 9, 0]) + return dgl.graph((src, dst)) diff --git a/python/cugraph-dgl/tests/nn/__init__.py b/python/cugraph-dgl/tests/nn/__init__.py deleted file mode 100644 index a1dd01f33d4..00000000000 --- a/python/cugraph-dgl/tests/nn/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/python/cugraph-dgl/tests/nn/common.py b/python/cugraph-dgl/tests/nn/common.py deleted file mode 100644 index 34787d20c9a..00000000000 --- a/python/cugraph-dgl/tests/nn/common.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from cugraph.utilities.utils import import_optional - -th = import_optional("torch") -dgl = import_optional("dgl") - - -def create_graph1(): - u = th.tensor([0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9]) - v = th.tensor([1, 9, 2, 9, 9, 4, 9, 5, 9, 6, 9, 9, 8, 9, 0]) - g = dgl.graph((u, v)) - return g diff --git a/python/cugraph-dgl/tests/nn/test_gatconv.py b/python/cugraph-dgl/tests/nn/test_gatconv.py index ce145b2bc87..de27efc6329 100644 --- a/python/cugraph-dgl/tests/nn/test_gatconv.py +++ b/python/cugraph-dgl/tests/nn/test_gatconv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,7 +15,6 @@ from cugraph_dgl.nn.conv.base import SparseGraph from cugraph_dgl.nn import GATConv as CuGraphGATConv -from .common import create_graph1 dgl = pytest.importorskip("dgl", reason="DGL not available") torch = pytest.importorskip("torch", reason="PyTorch not available") @@ -23,37 +22,49 @@ ATOL = 1e-6 -@pytest.mark.parametrize("bipartite", [False, True]) -@pytest.mark.parametrize("idtype_int", [False, True]) +@pytest.mark.parametrize("mode", ["bipartite", "share_weights", "regular"]) +@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) @pytest.mark.parametrize("max_in_degree", [None, 8]) @pytest.mark.parametrize("num_heads", [1, 2, 7]) @pytest.mark.parametrize("residual", [False, True]) @pytest.mark.parametrize("to_block", [False, True]) @pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) def test_gatconv_equality( - bipartite, idtype_int, max_in_degree, num_heads, residual, to_block, sparse_format + dgl_graph_1, + mode, + idx_type, + max_in_degree, + num_heads, + residual, + to_block, + sparse_format, ): from dgl.nn.pytorch import GATConv torch.manual_seed(12345) - g = create_graph1().to("cuda") + device = torch.device("cuda") + g = dgl_graph_1.to(device).astype(idx_type) - if idtype_int: - g = g.int() if to_block: g = dgl.to_block(g) size = (g.num_src_nodes(), g.num_dst_nodes()) - if bipartite: + if mode == "bipartite": in_feats = (10, 3) nfeat = ( - torch.rand(g.num_src_nodes(), in_feats[0]).cuda(), - torch.rand(g.num_dst_nodes(), in_feats[1]).cuda(), + torch.randn(size[0], in_feats[0]).to(device), + torch.randn(size[1], in_feats[1]).to(device), + ) + elif mode == "share_weights": + in_feats = 5 + nfeat = ( + torch.randn(size[0], in_feats).to(device), + torch.randn(size[1], in_feats).to(device), ) else: - in_feats = 10 - nfeat = torch.rand(g.num_src_nodes(), in_feats).cuda() + in_feats = 7 + nfeat = torch.randn(size[0], in_feats).to(device) out_feats = 2 if sparse_format == "coo": @@ -65,24 +76,24 @@ def test_gatconv_equality( sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") args = (in_feats, out_feats, num_heads) - kwargs = {"bias": False, "allow_zero_in_degree": True} + kwargs = {"bias": False, "allow_zero_in_degree": True, "residual": residual} - conv1 = GATConv(*args, **kwargs).cuda() - out1 = conv1(g, nfeat) + conv1 = GATConv(*args, **kwargs).to(device) + conv2 = CuGraphGATConv(*args, **kwargs).to(device) - conv2 = CuGraphGATConv(*args, **kwargs).cuda() dim = num_heads * out_feats with torch.no_grad(): - conv2.attn_weights.data[:dim] = conv1.attn_l.data.flatten() - conv2.attn_weights.data[dim:] = conv1.attn_r.data.flatten() - if bipartite: - conv2.lin_src.weight.data = conv1.fc_src.weight.data.detach().clone() - conv2.lin_dst.weight.data = conv1.fc_dst.weight.data.detach().clone() + conv2.attn_weights[:dim].copy_(conv1.attn_l.flatten()) + conv2.attn_weights[dim:].copy_(conv1.attn_r.flatten()) + if mode == "bipartite": + conv2.lin_src.weight.copy_(conv1.fc_src.weight) + conv2.lin_dst.weight.copy_(conv1.fc_dst.weight) else: - conv2.lin.weight.data = conv1.fc.weight.data.detach().clone() - if residual and conv2.residual: - conv2.lin_res.weight.data = conv1.fc_res.weight.data.detach().clone() + conv2.lin.weight.copy_(conv1.fc.weight) + if residual and conv1.has_linear_res: + conv2.lin_res.weight.copy_(conv1.res_fc.weight) + out1 = conv1(g, nfeat) if sparse_format is not None: out2 = conv2(sg, nfeat, max_in_degree=max_in_degree) else: @@ -90,12 +101,12 @@ def test_gatconv_equality( assert torch.allclose(out1, out2, atol=ATOL) - grad_out1 = torch.rand_like(out1) - grad_out2 = grad_out1.clone().detach() + grad_out1 = torch.randn_like(out1) + grad_out2 = grad_out1.detach().clone() out1.backward(grad_out1) out2.backward(grad_out2) - if bipartite: + if mode == "bipartite": assert torch.allclose( conv1.fc_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL ) @@ -105,25 +116,38 @@ def test_gatconv_equality( else: assert torch.allclose(conv1.fc.weight.grad, conv2.lin.weight.grad, atol=ATOL) + if residual and conv1.has_linear_res: + assert torch.allclose( + conv1.res_fc.weight.grad, conv2.lin_res.weight.grad, atol=ATOL + ) + assert torch.allclose( torch.cat((conv1.attn_l.grad, conv1.attn_r.grad), dim=0), conv2.attn_weights.grad.view(2, num_heads, out_feats), - atol=ATOL, + atol=1e-5, # Note: using a loosened tolerance here due to numerical error ) @pytest.mark.parametrize("bias", [False, True]) @pytest.mark.parametrize("bipartite", [False, True]) @pytest.mark.parametrize("concat", [False, True]) -@pytest.mark.parametrize("max_in_degree", [None, 8, 800]) +@pytest.mark.parametrize("max_in_degree", [None, 8]) @pytest.mark.parametrize("num_heads", [1, 2, 7]) @pytest.mark.parametrize("to_block", [False, True]) @pytest.mark.parametrize("use_edge_feats", [False, True]) def test_gatconv_edge_feats( - bias, bipartite, concat, max_in_degree, num_heads, to_block, use_edge_feats + dgl_graph_1, + bias, + bipartite, + concat, + max_in_degree, + num_heads, + to_block, + use_edge_feats, ): torch.manual_seed(12345) - g = create_graph1().to("cuda") + device = torch.device("cuda") + g = dgl_graph_1.to(device) if to_block: g = dgl.to_block(g) @@ -131,17 +155,17 @@ def test_gatconv_edge_feats( if bipartite: in_feats = (10, 3) nfeat = ( - torch.rand(g.num_src_nodes(), in_feats[0]).cuda(), - torch.rand(g.num_dst_nodes(), in_feats[1]).cuda(), + torch.rand(g.num_src_nodes(), in_feats[0]).to(device), + torch.rand(g.num_dst_nodes(), in_feats[1]).to(device), ) else: in_feats = 10 - nfeat = torch.rand(g.num_src_nodes(), in_feats).cuda() + nfeat = torch.rand(g.num_src_nodes(), in_feats).to(device) out_feats = 2 if use_edge_feats: edge_feats = 3 - efeat = torch.rand(g.num_edges(), edge_feats).cuda() + efeat = torch.rand(g.num_edges(), edge_feats).to(device) else: edge_feats = None efeat = None @@ -154,8 +178,8 @@ def test_gatconv_edge_feats( edge_feats=edge_feats, bias=bias, allow_zero_in_degree=True, - ).cuda() + ).to(device) out = conv(g, nfeat, efeat=efeat, max_in_degree=max_in_degree) - grad_out = torch.rand_like(out) + grad_out = torch.randn_like(out) out.backward(grad_out) diff --git a/python/cugraph-dgl/tests/nn/test_gatv2conv.py b/python/cugraph-dgl/tests/nn/test_gatv2conv.py index 52003edacca..2d26b7fdc28 100644 --- a/python/cugraph-dgl/tests/nn/test_gatv2conv.py +++ b/python/cugraph-dgl/tests/nn/test_gatv2conv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,45 +15,56 @@ from cugraph_dgl.nn.conv.base import SparseGraph from cugraph_dgl.nn import GATv2Conv as CuGraphGATv2Conv -from .common import create_graph1 dgl = pytest.importorskip("dgl", reason="DGL not available") torch = pytest.importorskip("torch", reason="PyTorch not available") -ATOL = 1e-6 +ATOL = 1e-5 -@pytest.mark.parametrize("bipartite", [False, True]) -@pytest.mark.parametrize("idtype_int", [False, True]) +@pytest.mark.parametrize("mode", ["bipartite", "share_weights", "regular"]) +@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) @pytest.mark.parametrize("max_in_degree", [None, 8]) @pytest.mark.parametrize("num_heads", [1, 2, 7]) @pytest.mark.parametrize("residual", [False, True]) @pytest.mark.parametrize("to_block", [False, True]) @pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) def test_gatv2conv_equality( - bipartite, idtype_int, max_in_degree, num_heads, residual, to_block, sparse_format + dgl_graph_1, + mode, + idx_type, + max_in_degree, + num_heads, + residual, + to_block, + sparse_format, ): from dgl.nn.pytorch import GATv2Conv torch.manual_seed(12345) - g = create_graph1().to("cuda") + device = torch.device("cuda") + g = dgl_graph_1.to(device).astype(idx_type) - if idtype_int: - g = g.int() if to_block: g = dgl.to_block(g) size = (g.num_src_nodes(), g.num_dst_nodes()) - if bipartite: + if mode == "bipartite": in_feats = (10, 3) nfeat = ( - torch.rand(g.num_src_nodes(), in_feats[0]).cuda(), - torch.rand(g.num_dst_nodes(), in_feats[1]).cuda(), + torch.randn(size[0], in_feats[0]).to(device), + torch.randn(size[1], in_feats[1]).to(device), + ) + elif mode == "share_weights": + in_feats = 5 + nfeat = ( + torch.randn(size[0], in_feats).to(device), + torch.randn(size[1], in_feats).to(device), ) else: - in_feats = 10 - nfeat = torch.rand(g.num_src_nodes(), in_feats).cuda() + in_feats = 7 + nfeat = torch.randn(size[0], in_feats).to(device) out_feats = 2 if sparse_format == "coo": @@ -65,19 +76,24 @@ def test_gatv2conv_equality( sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") args = (in_feats, out_feats, num_heads) - kwargs = {"bias": False, "allow_zero_in_degree": True} + kwargs = { + "bias": False, + "allow_zero_in_degree": True, + "residual": residual, + "share_weights": mode == "share_weights", + } - conv1 = GATv2Conv(*args, **kwargs).cuda() - out1 = conv1(g, nfeat) + conv1 = GATv2Conv(*args, **kwargs).to(device) + conv2 = CuGraphGATv2Conv(*args, **kwargs).to(device) - conv2 = CuGraphGATv2Conv(*args, **kwargs).cuda() with torch.no_grad(): - conv2.attn.data = conv1.attn.data.flatten() - conv2.lin_src.weight.data = conv1.fc_src.weight.data.detach().clone() - conv2.lin_dst.weight.data = conv1.fc_dst.weight.data.detach().clone() - if residual and conv2.residual: - conv2.lin_res.weight.data = conv1.fc_res.weight.data.detach().clone() + conv2.attn_weights.copy_(conv1.attn.flatten()) + conv2.lin_src.weight.copy_(conv1.fc_src.weight) + conv2.lin_dst.weight.copy_(conv1.fc_dst.weight) + if residual: + conv2.lin_res.weight.copy_(conv1.res_fc.weight) + out1 = conv1(g, nfeat) if sparse_format is not None: out2 = conv2(sg, nfeat, max_in_degree=max_in_degree) else: @@ -85,8 +101,8 @@ def test_gatv2conv_equality( assert torch.allclose(out1, out2, atol=ATOL) - grad_out1 = torch.rand_like(out1) - grad_out2 = grad_out1.clone().detach() + grad_out1 = torch.randn_like(out1) + grad_out2 = grad_out1.detach().clone() out1.backward(grad_out1) out2.backward(grad_out2) @@ -97,21 +113,38 @@ def test_gatv2conv_equality( conv1.fc_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL ) - assert torch.allclose(conv1.attn.grad, conv1.attn.grad, atol=ATOL) + if residual: + assert torch.allclose( + conv1.res_fc.weight.grad, conv2.lin_res.weight.grad, atol=ATOL + ) + + assert torch.allclose( + conv1.attn.grad, + conv2.attn_weights.grad.view(1, num_heads, out_feats), + atol=ATOL, + ) @pytest.mark.parametrize("bias", [False, True]) @pytest.mark.parametrize("bipartite", [False, True]) @pytest.mark.parametrize("concat", [False, True]) -@pytest.mark.parametrize("max_in_degree", [None, 8, 800]) +@pytest.mark.parametrize("max_in_degree", [None, 8]) @pytest.mark.parametrize("num_heads", [1, 2, 7]) @pytest.mark.parametrize("to_block", [False, True]) @pytest.mark.parametrize("use_edge_feats", [False, True]) def test_gatv2conv_edge_feats( - bias, bipartite, concat, max_in_degree, num_heads, to_block, use_edge_feats + dgl_graph_1, + bias, + bipartite, + concat, + max_in_degree, + num_heads, + to_block, + use_edge_feats, ): torch.manual_seed(12345) - g = create_graph1().to("cuda") + device = torch.device("cuda") + g = dgl_graph_1.to(device) if to_block: g = dgl.to_block(g) @@ -119,17 +152,17 @@ def test_gatv2conv_edge_feats( if bipartite: in_feats = (10, 3) nfeat = ( - torch.rand(g.num_src_nodes(), in_feats[0]).cuda(), - torch.rand(g.num_dst_nodes(), in_feats[1]).cuda(), + torch.rand(g.num_src_nodes(), in_feats[0]).to(device), + torch.rand(g.num_dst_nodes(), in_feats[1]).to(device), ) else: in_feats = 10 - nfeat = torch.rand(g.num_src_nodes(), in_feats).cuda() + nfeat = torch.rand(g.num_src_nodes(), in_feats).to(device) out_feats = 2 if use_edge_feats: edge_feats = 3 - efeat = torch.rand(g.num_edges(), edge_feats).cuda() + efeat = torch.rand(g.num_edges(), edge_feats).to(device) else: edge_feats = None efeat = None @@ -142,8 +175,8 @@ def test_gatv2conv_edge_feats( edge_feats=edge_feats, bias=bias, allow_zero_in_degree=True, - ).cuda() + ).to(device) out = conv(g, nfeat, efeat=efeat, max_in_degree=max_in_degree) - grad_out = torch.rand_like(out) + grad_out = torch.randn_like(out) out.backward(grad_out) diff --git a/python/cugraph-dgl/tests/nn/test_relgraphconv.py b/python/cugraph-dgl/tests/nn/test_relgraphconv.py index bdaa89e57f2..b5d3686c609 100644 --- a/python/cugraph-dgl/tests/nn/test_relgraphconv.py +++ b/python/cugraph-dgl/tests/nn/test_relgraphconv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,7 +15,6 @@ from cugraph_dgl.nn.conv.base import SparseGraph from cugraph_dgl.nn import RelGraphConv as CuGraphRelGraphConv -from .common import create_graph1 dgl = pytest.importorskip("dgl", reason="DGL not available") torch = pytest.importorskip("torch", reason="PyTorch not available") @@ -23,7 +22,7 @@ ATOL = 1e-6 -@pytest.mark.parametrize("idtype_int", [False, True]) +@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) @pytest.mark.parametrize("max_in_degree", [None, 8]) @pytest.mark.parametrize("num_bases", [1, 2, 5]) @pytest.mark.parametrize("regularizer", [None, "basis"]) @@ -31,7 +30,8 @@ @pytest.mark.parametrize("to_block", [False, True]) @pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) def test_relgraphconv_equality( - idtype_int, + dgl_graph_1, + idx_type, max_in_degree, num_bases, regularizer, @@ -42,6 +42,12 @@ def test_relgraphconv_equality( from dgl.nn.pytorch import RelGraphConv torch.manual_seed(12345) + device = torch.device("cuda") + g = dgl_graph_1.to(device).astype(idx_type) + + if to_block: + g = dgl.to_block(g) + in_feat, out_feat, num_rels = 10, 2, 3 args = (in_feat, out_feat, num_rels) kwargs = { @@ -50,16 +56,10 @@ def test_relgraphconv_equality( "bias": False, "self_loop": self_loop, } - g = create_graph1().to("cuda") - g.edata[dgl.ETYPE] = torch.randint(num_rels, (g.num_edges(),)).cuda() - - if idtype_int: - g = g.int() - if to_block: - g = dgl.to_block(g) + g.edata[dgl.ETYPE] = torch.randint(num_rels, (g.num_edges(),)).to(device) size = (g.num_src_nodes(), g.num_dst_nodes()) - feat = torch.rand(g.num_src_nodes(), in_feat).cuda() + feat = torch.rand(g.num_src_nodes(), in_feat).to(device) if sparse_format == "coo": sg = SparseGraph( @@ -76,18 +76,18 @@ def test_relgraphconv_equality( size=size, src_ids=indices, cdst_ids=offsets, values=etypes, formats="csc" ) - conv1 = RelGraphConv(*args, **kwargs).cuda() - conv2 = CuGraphRelGraphConv(*args, **kwargs, apply_norm=False).cuda() + conv1 = RelGraphConv(*args, **kwargs).to(device) + conv2 = CuGraphRelGraphConv(*args, **kwargs, apply_norm=False).to(device) with torch.no_grad(): if self_loop: - conv2.W.data[:-1] = conv1.linear_r.W.data - conv2.W.data[-1] = conv1.loop_weight.data + conv2.W[:-1].copy_(conv1.linear_r.W) + conv2.W[-1].copy_(conv1.loop_weight) else: - conv2.W.data = conv1.linear_r.W.data.detach().clone() + conv2.W.copy_(conv1.linear_r.W) if regularizer is not None: - conv2.coeff.data = conv1.linear_r.coeff.data.detach().clone() + conv2.coeff.copy_(conv1.linear_r.coeff) out1 = conv1(g, feat, g.edata[dgl.ETYPE]) @@ -98,7 +98,7 @@ def test_relgraphconv_equality( assert torch.allclose(out1, out2, atol=ATOL) - grad_out = torch.rand_like(out1) + grad_out = torch.randn_like(out1) out1.backward(grad_out) out2.backward(grad_out) diff --git a/python/cugraph-dgl/tests/nn/test_sageconv.py b/python/cugraph-dgl/tests/nn/test_sageconv.py index b5d0a44b868..3f1c2b1b3fe 100644 --- a/python/cugraph-dgl/tests/nn/test_sageconv.py +++ b/python/cugraph-dgl/tests/nn/test_sageconv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,7 +15,6 @@ from cugraph_dgl.nn.conv.base import SparseGraph from cugraph_dgl.nn import SAGEConv as CuGraphSAGEConv -from .common import create_graph1 dgl = pytest.importorskip("dgl", reason="DGL not available") torch = pytest.importorskip("torch", reason="PyTorch not available") @@ -26,21 +25,19 @@ @pytest.mark.parametrize("aggr", ["mean", "pool"]) @pytest.mark.parametrize("bias", [False, True]) @pytest.mark.parametrize("bipartite", [False, True]) -@pytest.mark.parametrize("idtype_int", [False, True]) +@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) @pytest.mark.parametrize("max_in_degree", [None, 8]) @pytest.mark.parametrize("to_block", [False, True]) @pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) def test_sageconv_equality( - aggr, bias, bipartite, idtype_int, max_in_degree, to_block, sparse_format + dgl_graph_1, aggr, bias, bipartite, idx_type, max_in_degree, to_block, sparse_format ): from dgl.nn.pytorch import SAGEConv torch.manual_seed(12345) - kwargs = {"aggregator_type": aggr, "bias": bias} - g = create_graph1().to("cuda") + device = torch.device("cuda") + g = dgl_graph_1.to(device).astype(idx_type) - if idtype_int: - g = g.int() if to_block: g = dgl.to_block(g) @@ -49,12 +46,12 @@ def test_sageconv_equality( if bipartite: in_feats = (5, 3) feat = ( - torch.rand(size[0], in_feats[0], requires_grad=True).cuda(), - torch.rand(size[1], in_feats[1], requires_grad=True).cuda(), + torch.rand(size[0], in_feats[0], requires_grad=True).to(device), + torch.rand(size[1], in_feats[1], requires_grad=True).to(device), ) else: in_feats = 5 - feat = torch.rand(size[0], in_feats).cuda() + feat = torch.rand(size[0], in_feats).to(device) out_feats = 2 if sparse_format == "coo": @@ -65,18 +62,19 @@ def test_sageconv_equality( offsets, indices, _ = g.adj_tensors("csc") sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") - conv1 = SAGEConv(in_feats, out_feats, **kwargs).cuda() - conv2 = CuGraphSAGEConv(in_feats, out_feats, **kwargs).cuda() + kwargs = {"aggregator_type": aggr, "bias": bias} + conv1 = SAGEConv(in_feats, out_feats, **kwargs).to(device) + conv2 = CuGraphSAGEConv(in_feats, out_feats, **kwargs).to(device) in_feats_src = conv2.in_feats_src with torch.no_grad(): - conv2.lin.weight.data[:, :in_feats_src] = conv1.fc_neigh.weight.data - conv2.lin.weight.data[:, in_feats_src:] = conv1.fc_self.weight.data + conv2.lin.weight[:, :in_feats_src].copy_(conv1.fc_neigh.weight) + conv2.lin.weight[:, in_feats_src:].copy_(conv1.fc_self.weight) if bias: - conv2.lin.bias.data[:] = conv1.fc_self.bias.data + conv2.lin.bias.copy_(conv1.fc_self.bias) if aggr == "pool": - conv2.pre_lin.weight.data[:] = conv1.fc_pool.weight.data - conv2.pre_lin.bias.data[:] = conv1.fc_pool.bias.data + conv2.pre_lin.weight.copy_(conv1.fc_pool.weight) + conv2.pre_lin.bias.copy_(conv1.fc_pool.bias) out1 = conv1(g, feat) if sparse_format is not None: @@ -85,7 +83,7 @@ def test_sageconv_equality( out2 = conv2(g, feat, max_in_degree=max_in_degree) assert torch.allclose(out1, out2, atol=ATOL) - grad_out = torch.rand_like(out1) + grad_out = torch.randn_like(out1) out1.backward(grad_out) out2.backward(grad_out) assert torch.allclose( diff --git a/python/cugraph-dgl/tests/nn/test_transformerconv.py b/python/cugraph-dgl/tests/nn/test_transformerconv.py index 5ac4fd7bea7..28d13dedec8 100644 --- a/python/cugraph-dgl/tests/nn/test_transformerconv.py +++ b/python/cugraph-dgl/tests/nn/test_transformerconv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,7 +15,6 @@ from cugraph_dgl.nn.conv.base import SparseGraph from cugraph_dgl.nn import TransformerConv -from .common import create_graph1 dgl = pytest.importorskip("dgl", reason="DGL not available") torch = pytest.importorskip("torch", reason="PyTorch not available") @@ -26,27 +25,25 @@ @pytest.mark.parametrize("beta", [False, True]) @pytest.mark.parametrize("bipartite_node_feats", [False, True]) @pytest.mark.parametrize("concat", [False, True]) -@pytest.mark.parametrize("idtype_int", [False, True]) -@pytest.mark.parametrize("num_heads", [1, 2, 3, 4]) +@pytest.mark.parametrize("idx_type", [torch.int32, torch.int64]) +@pytest.mark.parametrize("num_heads", [1, 3, 4]) @pytest.mark.parametrize("to_block", [False, True]) @pytest.mark.parametrize("use_edge_feats", [False, True]) @pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) def test_transformerconv( + dgl_graph_1, beta, bipartite_node_feats, concat, - idtype_int, + idx_type, num_heads, to_block, use_edge_feats, sparse_format, ): torch.manual_seed(12345) - device = "cuda" - g = create_graph1().to(device) - - if idtype_int: - g = g.int() + device = torch.device("cuda") + g = dgl_graph_1.to(device).astype(idx_type) if to_block: g = dgl.to_block(g) @@ -92,5 +89,5 @@ def test_transformerconv( else: out = conv(g, nfeat, efeat) - grad_out = torch.rand_like(out) + grad_out = torch.randn_like(out) out.backward(grad_out)