rapidsai · alexbarghi-nv · Nov 18, 2024 · Nov 22, 2024 · Dec 2, 2024 · Dec 5, 2024
@@ -70,6 +70,7 @@ def __clear_graph(self):
         self.__graph = None
         self.__vertex_offsets = None
         self.__weight_attr = None
+        self.__numeric_edge_types = None
 
     def _put_edge_index(
         self,
@@ -240,6 +241,27 @@ def _vertex_offsets(self) -> Dict[str, int]:
 
         return dict(self.__vertex_offsets)
 
+    @property
+    def _vertex_offset_array(self) -> "torch.Tensor":
+        off = torch.tensor(
+            [self._vertex_offsets[k] for k in sorted(self._vertex_offsets.keys())],
+            dtype=torch.int64,
+            device="cuda",
+        )
+
+        return torch.concat(
+            [
+                off,
+                torch.tensor(
+                    list(self._num_vertices().values()),
+                    device="cuda",
+                    dtype=torch.int64,
+                )
+                .sum()
+                .reshape((1,)),
+            ]
+        )
+
     @property
     def is_homogeneous(self) -> bool:
         return len(self._vertex_offsets) == 1
@@ -270,6 +292,38 @@ def __get_weight_tensor(
 
         return torch.concat(weights)
 
+    @property
+    def _numeric_edge_types(self) -> Tuple[List, "torch.Tensor", "torch.Tensor"]:
+        """
+        Returns the canonical edge types in order (the 0th canonical type corresponds
+        to numeric edge type 0, etc.), along with the numeric source and destination
+        vertex types for each edge type.
+        """
+
+        if self.__numeric_edge_types is None:
+            sorted_keys = sorted(
+                list(self.__edge_indices.keys(leaves_only=True, include_nested=True))
+            )
+
+            vtype_table = {
+                k: i for i, k in enumerate(sorted(self._vertex_offsets.keys()))
+            }
+
+            srcs = []
+            dsts = []
+
+            for can_etype in sorted_keys:
+                srcs.append(vtype_table[can_etype[0]])
+                dsts.append(vtype_table[can_etype[2]])
+
+            self.__numeric_edge_types = (
+                sorted_keys,
+                torch.tensor(srcs, device="cuda", dtype=torch.int32),
+                torch.tensor(dsts, device="cuda", dtype=torch.int32),
+            )
+
+        return self.__numeric_edge_types
+
     def __get_edgelist(self):
         """
         Returns

@@ -128,6 +128,11 @@ def __init__(
             (None, edge_label_index),
         )
 
+        # Note reverse of standard convention here
+        if input_type is not None:
+            edge_label_index[0] += data[1]._vertex_offsets[input_type[0]]
+            edge_label_index[1] += data[1]._vertex_offsets[input_type[2]]
+
         self.__input_data = torch_geometric.sampler.EdgeSamplerInput(
             input_id=torch.arange(
                 edge_label_index[0].numel(), dtype=torch.int64, device="cuda"

@@ -188,11 +188,23 @@ def __init__(
             # Will eventually automatically convert these objects to cuGraph objects.
             raise NotImplementedError("Currently can't accept non-cugraph graphs")
 
+        feature_store, graph_store = data
+
         if compression is None:
-            compression = "CSR"
+            compression = "CSR" if graph_store.is_homogeneous else "COO"
         elif compression not in ["CSR", "COO"]:
             raise ValueError("Invalid value for compression (expected 'CSR' or 'COO')")
 
+        if not graph_store.is_homogeneous:
+            if compression != "COO":
+                raise ValueError(
+                    "Only COO format is supported for heterogeneous graphs!"
+                )
+            if directory is not None:
+                raise ValueError(
+                    "Writing to disk is not supported for heterogeneous graphs!"
+                )
+
         writer = (
             None
             if directory is None
@@ -203,8 +215,6 @@ def __init__(
             )
         )
 
-        feature_store, graph_store = data
-
         if weight_attr is not None:
             graph_store._set_weight_attr((feature_store, weight_attr))
 
@@ -221,6 +231,9 @@ def __init__(
                 with_replacement=replace,
                 local_seeds_per_call=local_seeds_per_call,
                 biased=(weight_attr is not None),
+                heterogeneous=(not graph_store.is_homogeneous),
+                vertex_type_offsets=graph_store._vertex_offset_array,
+                num_edge_types=len(graph_store.get_all_edge_attrs()),
             ),
             (feature_store, graph_store),
             batch_size=batch_size,

@@ -181,11 +181,23 @@ def __init__(
             # Will eventually automatically convert these objects to cuGraph objects.
             raise NotImplementedError("Currently can't accept non-cugraph graphs")
 
+        feature_store, graph_store = data
+
         if compression is None:
-            compression = "CSR"
+            compression = "CSR" if graph_store.is_homogeneous else "COO"
         elif compression not in ["CSR", "COO"]:
             raise ValueError("Invalid value for compression (expected 'CSR' or 'COO')")
 
+        if not graph_store.is_homogeneous:
+            if compression != "COO":
+                raise ValueError(
+                    "Only COO format is supported for heterogeneous graphs!"
+                )
+            if directory is not None:
+                raise ValueError(
+                    "Writing to disk is not supported for heterogeneous graphs!"
+                )
+
         writer = (
             None
             if directory is None
@@ -196,8 +208,6 @@ def __init__(
             )
         )
 
-        feature_store, graph_store = data
-
         if weight_attr is not None:
             graph_store._set_weight_attr((feature_store, weight_attr))
 
@@ -214,6 +224,9 @@ def __init__(
                 with_replacement=replace,
                 local_seeds_per_call=local_seeds_per_call,
                 biased=(weight_attr is not None),
+                heterogeneous=(not graph_store.is_homogeneous),
+                vertex_type_offsets=graph_store._vertex_offset_array,
+                num_edge_types=len(graph_store.get_all_edge_attrs()),
             ),
             (feature_store, graph_store),
             batch_size=batch_size,

@@ -109,6 +109,8 @@ def __init__(
             input_nodes,
             input_id,
         )
+        if input_type is not None:
+            input_nodes += data[1]._vertex_offsets[input_type]
 
         self.__input_data = torch_geometric.sampler.NodeSamplerInput(
             input_id=torch.arange(len(input_nodes), dtype=torch.int64, device="cuda")