Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEA] Support Heterogeneous Sampling in cuGraph-PyG #82

Draft
wants to merge 7 commits into
base: branch-25.02
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions python/cugraph-pyg/cugraph_pyg/data/graph_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def __clear_graph(self):
self.__graph = None
self.__vertex_offsets = None
self.__weight_attr = None
self.__numeric_edge_types = None

def _put_edge_index(
self,
Expand Down Expand Up @@ -240,6 +241,27 @@ def _vertex_offsets(self) -> Dict[str, int]:

return dict(self.__vertex_offsets)

@property
def _vertex_offset_array(self) -> "torch.Tensor":
off = torch.tensor(
[self._vertex_offsets[k] for k in sorted(self._vertex_offsets.keys())],
dtype=torch.int64,
device="cuda",
)

return torch.concat(
[
off,
torch.tensor(
list(self._num_vertices().values()),
device="cuda",
dtype=torch.int64,
)
.sum()
.reshape((1,)),
]
)

@property
def is_homogeneous(self) -> bool:
return len(self._vertex_offsets) == 1
Expand Down Expand Up @@ -270,6 +292,38 @@ def __get_weight_tensor(

return torch.concat(weights)

@property
def _numeric_edge_types(self) -> Tuple[List, "torch.Tensor", "torch.Tensor"]:
"""
Returns the canonical edge types in order (the 0th canonical type corresponds
to numeric edge type 0, etc.), along with the numeric source and destination
vertex types for each edge type.
"""

if self.__numeric_edge_types is None:
sorted_keys = sorted(
list(self.__edge_indices.keys(leaves_only=True, include_nested=True))
)

vtype_table = {
k: i for i, k in enumerate(sorted(self._vertex_offsets.keys()))
}

srcs = []
dsts = []

for can_etype in sorted_keys:
srcs.append(vtype_table[can_etype[0]])
dsts.append(vtype_table[can_etype[2]])

self.__numeric_edge_types = (
sorted_keys,
torch.tensor(srcs, device="cuda", dtype=torch.int32),
torch.tensor(dsts, device="cuda", dtype=torch.int32),
)

return self.__numeric_edge_types

def __get_edgelist(self):
"""
Returns
Expand Down
5 changes: 5 additions & 0 deletions python/cugraph-pyg/cugraph_pyg/loader/link_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ def __init__(
(None, edge_label_index),
)

# Note reverse of standard convention here
if input_type is not None:
edge_label_index[0] += data[1]._vertex_offsets[input_type[0]]
edge_label_index[1] += data[1]._vertex_offsets[input_type[2]]

self.__input_data = torch_geometric.sampler.EdgeSamplerInput(
input_id=torch.arange(
edge_label_index[0].numel(), dtype=torch.int64, device="cuda"
Expand Down
19 changes: 16 additions & 3 deletions python/cugraph-pyg/cugraph_pyg/loader/link_neighbor_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,23 @@ def __init__(
# Will eventually automatically convert these objects to cuGraph objects.
raise NotImplementedError("Currently can't accept non-cugraph graphs")

feature_store, graph_store = data

if compression is None:
compression = "CSR"
compression = "CSR" if graph_store.is_homogeneous else "COO"
elif compression not in ["CSR", "COO"]:
raise ValueError("Invalid value for compression (expected 'CSR' or 'COO')")

if not graph_store.is_homogeneous:
if compression != "COO":
raise ValueError(
"Only COO format is supported for heterogeneous graphs!"
)
if directory is not None:
raise ValueError(
"Writing to disk is not supported for heterogeneous graphs!"
)

writer = (
None
if directory is None
Expand All @@ -203,8 +215,6 @@ def __init__(
)
)

feature_store, graph_store = data

if weight_attr is not None:
graph_store._set_weight_attr((feature_store, weight_attr))

Expand All @@ -221,6 +231,9 @@ def __init__(
with_replacement=replace,
local_seeds_per_call=local_seeds_per_call,
biased=(weight_attr is not None),
heterogeneous=(not graph_store.is_homogeneous),
vertex_type_offsets=graph_store._vertex_offset_array,
num_edge_types=len(graph_store.get_all_edge_attrs()),
),
(feature_store, graph_store),
batch_size=batch_size,
Expand Down
19 changes: 16 additions & 3 deletions python/cugraph-pyg/cugraph_pyg/loader/neighbor_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,23 @@ def __init__(
# Will eventually automatically convert these objects to cuGraph objects.
raise NotImplementedError("Currently can't accept non-cugraph graphs")

feature_store, graph_store = data

if compression is None:
compression = "CSR"
compression = "CSR" if graph_store.is_homogeneous else "COO"
elif compression not in ["CSR", "COO"]:
raise ValueError("Invalid value for compression (expected 'CSR' or 'COO')")

if not graph_store.is_homogeneous:
if compression != "COO":
raise ValueError(
"Only COO format is supported for heterogeneous graphs!"
)
if directory is not None:
raise ValueError(
"Writing to disk is not supported for heterogeneous graphs!"
)

writer = (
None
if directory is None
Expand All @@ -196,8 +208,6 @@ def __init__(
)
)

feature_store, graph_store = data

if weight_attr is not None:
graph_store._set_weight_attr((feature_store, weight_attr))

Expand All @@ -214,6 +224,9 @@ def __init__(
with_replacement=replace,
local_seeds_per_call=local_seeds_per_call,
biased=(weight_attr is not None),
heterogeneous=(not graph_store.is_homogeneous),
vertex_type_offsets=graph_store._vertex_offset_array,
num_edge_types=len(graph_store.get_all_edge_attrs()),
),
(feature_store, graph_store),
batch_size=batch_size,
Expand Down
2 changes: 2 additions & 0 deletions python/cugraph-pyg/cugraph_pyg/loader/node_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ def __init__(
input_nodes,
input_id,
)
if input_type is not None:
input_nodes += data[1]._vertex_offsets[input_type]

self.__input_data = torch_geometric.sampler.NodeSamplerInput(
input_id=torch.arange(len(input_nodes), dtype=torch.int64, device="cuda")
Expand Down
Loading