e2e smoke test

rapidsai · Sep 28, 2023 · 185143c · 185143c
1 parent 13be49c
commit 185143c
Show file tree

Hide file tree

Showing 2 changed files with 117 additions and 4 deletions.
diff --git a/python/cugraph-pyg/cugraph_pyg/sampler/cugraph_sampler.py b/python/cugraph-pyg/cugraph_pyg/sampler/cugraph_sampler.py
@@ -225,7 +225,14 @@ def _sampler_output_from_sampling_results_homogeneous_csr(
     num_edges_per_hop_dict = {edge_type: major_offsets[label_hop_offsets].diff().cpu()}
 
     label_hop_offsets = label_hop_offsets.cpu()
-    num_nodes_per_hop_dict = {node_type: torch.concat([label_hop_offsets.diff(), (renumber_map.shape[0] - label_hop_offsets[-1]).reshape((1,))])}
+    num_nodes_per_hop_dict = {
+        node_type: torch.concat(
+            [
+                label_hop_offsets.diff(),
+                (renumber_map.shape[0] - label_hop_offsets[-1]).reshape((1,)),
+            ]
+        )
+    }
 
     noi_index = {node_type: torch.as_tensor(renumber_map, device="cuda")}
 

diff --git a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py
@@ -22,6 +22,8 @@
 from cugraph_pyg.loader import CuGraphNeighborLoader
 from cugraph_pyg.loader import BulkSampleLoader
 from cugraph_pyg.data import CuGraphStore
+from cugraph_pyg.nn import SAGEConv as CuGraphSAGEConv
+
 from cugraph.gnn import FeatureStore
 from cugraph.utilities.utils import import_optional, MissingModule
 
@@ -243,7 +245,7 @@ def test_cugraph_loader_from_disk_subset_csr():
     for sample in loader:
         num_samples += 1
         assert sample["t0"]["num_nodes"] == 6
-        # correct vertex order is [0, 1, 2, 6, 4, 3, 5]; x = [1, 2, 3, 7, 5, 4, 6]
+
         assert sample["t0"]["x"].tolist() == [1, 2, 3, 4, 5, 6]
 
         edge_index = sample[("t0", "knows", "t0")]["adj_t"]
@@ -257,8 +259,8 @@ def test_cugraph_loader_from_disk_subset_csr():
         )
         assert row.tolist() == bogus_samples.minors.dropna().values_host.tolist()
 
-        #assert sample['t0']['num_sampled_nodes'].tolist() == [1, 3, 2]
-        assert sample['t0','knows','t0']['num_sampled_edges'].tolist() == [3, 5]
+        assert sample["t0"]["num_sampled_nodes"].tolist() == [1, 3, 2]
+        assert sample["t0", "knows", "t0"]["num_sampled_edges"].tolist() == [3, 5]
 
     assert num_samples == 100
 
@@ -327,3 +329,107 @@ def test_cugraph_loader_e2e_coo():
         x = x.narrow(dim=0, start=0, length=x.shape[0] - num_sampled_nodes[1])
 
         assert list(x.shape) == [3, 1]
+
+
+@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
+@pytest.mark.parametrize("framework", ["pyg", "cugraph-ops"])
+def test_cugraph_loader_e2e_csc(framework):
+    m = [2, 9, 99, 82, 9, 3, 18, 1, 12]
+    x = torch.randint(3000, (256, 256)).to(torch.float32)
+    F = FeatureStore()
+    F.add_data(x, "t0", "x")
+
+    G = {("t0", "knows", "t0"): 9999}
+    N = {"t0": 256}
+
+    cugraph_store = CuGraphStore(F, G, N)
+
+    bogus_samples = cudf.DataFrame(
+        {
+            "major_offsets": [0, 3, 5, 7, 8, None, None, None],
+            "minors": [1, 2, 3, 0, 3, 4, 5, 1],
+            "edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"),
+            "edge_id": [5, 10, 15, 20, 25, 30, 35, 40],
+            "label_hop_offsets": cudf.Series(
+                [0, 1, 4, None, None, None, None, None], dtype="int32"
+            ),
+            "renumber_map_offsets": cudf.Series([0, 6], dtype="int32"),
+        }
+    )
+    map = cudf.Series(m, name="map")
+    bogus_samples = bogus_samples.join(map, how="outer").sort_index()
+
+    tempdir = tempfile.TemporaryDirectory()
+    for s in range(256):
+        bogus_samples["batch_id"] = cupy.int32(s)
+        bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet"))
+
+    loader = BulkSampleLoader(
+        feature_store=cugraph_store,
+        graph_store=cugraph_store,
+        directory=tempdir,
+        input_files=list(os.listdir(tempdir.name))[100:200],
+    )
+
+    if framework == "pyg":
+        convs = [
+            torch_geometric.nn.SAGEConv(256, 64, aggr="mean").cuda(),
+            torch_geometric.nn.SAGEConv(64, 1, aggr="mean").cuda(),
+        ]
+    else:
+        convs = [
+            CuGraphSAGEConv(256, 64, aggr="mean").cuda(),
+            CuGraphSAGEConv(64, 1, aggr="mean").cuda(),
+        ]
+
+    trim = trim_to_layer.TrimToLayer()
+    relu = torch.nn.functional.relu
+    dropout = torch.nn.functional.dropout
+
+    for hetero_data in loader:
+        x = hetero_data["t0"]["x"].cuda()
+
+        if framework == "pyg":
+            ei = hetero_data["t0", "knows", "t0"]["adj_t"].coo()
+            ei = torch.stack((ei[0], ei[1]))
+        else:
+            ei = hetero_data["t0", "knows", "t0"]["adj_t"].csr()
+            ei = [ei[1], ei[0], x.shape[0]]
+
+        num_sampled_nodes = hetero_data["t0"]["num_sampled_nodes"]
+        num_sampled_edges = hetero_data["t0", "knows", "t0"]["num_sampled_edges"]
+
+        s = x.shape[0]
+        for i in range(len(convs)):
+            if framework == "pyg":
+                x, ei, _ = trim(i, num_sampled_nodes, num_sampled_edges, x, ei, None)
+            else:
+                if i > 0:
+                    x = x.narrow(
+                        dim=0,
+                        start=0,
+                        length=s - num_sampled_nodes[-i],
+                    )
+
+                    ei[0] = ei[0].narrow(
+                        dim=0,
+                        start=0,
+                        length=ei[0].size(0) - num_sampled_edges[-i],
+                    )
+                    ei[1] = ei[1].narrow(
+                        dim=0, start=0, length=ei[1].size(0) - num_sampled_nodes[-i]
+                    )
+                    ei[2] = x.size(0)
+
+            s = x.shape[0]
+
+            if framework == "pyg":
+                x = convs[i](x, ei, size=(s, s))
+            else:
+                x = convs[i](x, ei)
+            x = relu(x)
+            x = dropout(x, p=0.5)
+
+        x = x.narrow(dim=0, start=0, length=s - num_sampled_nodes[1])
+
+        assert list(x.shape) == [1, 1]