Skip to content

Commit

Permalink
e2e smoke test
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbarghi-nv committed Sep 28, 2023
1 parent 13be49c commit 185143c
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 4 deletions.
9 changes: 8 additions & 1 deletion python/cugraph-pyg/cugraph_pyg/sampler/cugraph_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,14 @@ def _sampler_output_from_sampling_results_homogeneous_csr(
num_edges_per_hop_dict = {edge_type: major_offsets[label_hop_offsets].diff().cpu()}

label_hop_offsets = label_hop_offsets.cpu()
num_nodes_per_hop_dict = {node_type: torch.concat([label_hop_offsets.diff(), (renumber_map.shape[0] - label_hop_offsets[-1]).reshape((1,))])}
num_nodes_per_hop_dict = {
node_type: torch.concat(
[
label_hop_offsets.diff(),
(renumber_map.shape[0] - label_hop_offsets[-1]).reshape((1,)),
]
)
}

noi_index = {node_type: torch.as_tensor(renumber_map, device="cuda")}

Expand Down
112 changes: 109 additions & 3 deletions python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from cugraph_pyg.loader import CuGraphNeighborLoader
from cugraph_pyg.loader import BulkSampleLoader
from cugraph_pyg.data import CuGraphStore
from cugraph_pyg.nn import SAGEConv as CuGraphSAGEConv

from cugraph.gnn import FeatureStore
from cugraph.utilities.utils import import_optional, MissingModule

Expand Down Expand Up @@ -243,7 +245,7 @@ def test_cugraph_loader_from_disk_subset_csr():
for sample in loader:
num_samples += 1
assert sample["t0"]["num_nodes"] == 6
# correct vertex order is [0, 1, 2, 6, 4, 3, 5]; x = [1, 2, 3, 7, 5, 4, 6]

assert sample["t0"]["x"].tolist() == [1, 2, 3, 4, 5, 6]

edge_index = sample[("t0", "knows", "t0")]["adj_t"]
Expand All @@ -257,8 +259,8 @@ def test_cugraph_loader_from_disk_subset_csr():
)
assert row.tolist() == bogus_samples.minors.dropna().values_host.tolist()

#assert sample['t0']['num_sampled_nodes'].tolist() == [1, 3, 2]
assert sample['t0','knows','t0']['num_sampled_edges'].tolist() == [3, 5]
assert sample["t0"]["num_sampled_nodes"].tolist() == [1, 3, 2]
assert sample["t0", "knows", "t0"]["num_sampled_edges"].tolist() == [3, 5]

assert num_samples == 100

Expand Down Expand Up @@ -327,3 +329,107 @@ def test_cugraph_loader_e2e_coo():
x = x.narrow(dim=0, start=0, length=x.shape[0] - num_sampled_nodes[1])

assert list(x.shape) == [3, 1]


@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available")
@pytest.mark.parametrize("framework", ["pyg", "cugraph-ops"])
def test_cugraph_loader_e2e_csc(framework):
m = [2, 9, 99, 82, 9, 3, 18, 1, 12]
x = torch.randint(3000, (256, 256)).to(torch.float32)
F = FeatureStore()
F.add_data(x, "t0", "x")

G = {("t0", "knows", "t0"): 9999}
N = {"t0": 256}

cugraph_store = CuGraphStore(F, G, N)

bogus_samples = cudf.DataFrame(
{
"major_offsets": [0, 3, 5, 7, 8, None, None, None],
"minors": [1, 2, 3, 0, 3, 4, 5, 1],
"edge_type": cudf.Series([0, 0, 0, 0, 0, 0, 0, 0], dtype="int32"),
"edge_id": [5, 10, 15, 20, 25, 30, 35, 40],
"label_hop_offsets": cudf.Series(
[0, 1, 4, None, None, None, None, None], dtype="int32"
),
"renumber_map_offsets": cudf.Series([0, 6], dtype="int32"),
}
)
map = cudf.Series(m, name="map")
bogus_samples = bogus_samples.join(map, how="outer").sort_index()

tempdir = tempfile.TemporaryDirectory()
for s in range(256):
bogus_samples["batch_id"] = cupy.int32(s)
bogus_samples.to_parquet(os.path.join(tempdir.name, f"batch={s}-{s}.parquet"))

loader = BulkSampleLoader(
feature_store=cugraph_store,
graph_store=cugraph_store,
directory=tempdir,
input_files=list(os.listdir(tempdir.name))[100:200],
)

if framework == "pyg":
convs = [
torch_geometric.nn.SAGEConv(256, 64, aggr="mean").cuda(),
torch_geometric.nn.SAGEConv(64, 1, aggr="mean").cuda(),
]
else:
convs = [
CuGraphSAGEConv(256, 64, aggr="mean").cuda(),
CuGraphSAGEConv(64, 1, aggr="mean").cuda(),
]

trim = trim_to_layer.TrimToLayer()
relu = torch.nn.functional.relu
dropout = torch.nn.functional.dropout

for hetero_data in loader:
x = hetero_data["t0"]["x"].cuda()

if framework == "pyg":
ei = hetero_data["t0", "knows", "t0"]["adj_t"].coo()
ei = torch.stack((ei[0], ei[1]))
else:
ei = hetero_data["t0", "knows", "t0"]["adj_t"].csr()
ei = [ei[1], ei[0], x.shape[0]]

num_sampled_nodes = hetero_data["t0"]["num_sampled_nodes"]
num_sampled_edges = hetero_data["t0", "knows", "t0"]["num_sampled_edges"]

s = x.shape[0]
for i in range(len(convs)):
if framework == "pyg":
x, ei, _ = trim(i, num_sampled_nodes, num_sampled_edges, x, ei, None)
else:
if i > 0:
x = x.narrow(
dim=0,
start=0,
length=s - num_sampled_nodes[-i],
)

ei[0] = ei[0].narrow(
dim=0,
start=0,
length=ei[0].size(0) - num_sampled_edges[-i],
)
ei[1] = ei[1].narrow(
dim=0, start=0, length=ei[1].size(0) - num_sampled_nodes[-i]
)
ei[2] = x.size(0)

s = x.shape[0]

if framework == "pyg":
x = convs[i](x, ei, size=(s, s))
else:
x = convs[i](x, ei)
x = relu(x)
x = dropout(x, p=0.5)

x = x.narrow(dim=0, start=0, length=s - num_sampled_nodes[1])

assert list(x.shape) == [1, 1]

0 comments on commit 185143c

Please sign in to comment.