Merge branch 'sample-buffer' into pyg-linkpred

rapidsai · Aug 28, 2024 · 6553464 · 6553464
2 parents 2c97eb6 + 62a2fdf
commit 6553464
Show file tree

Hide file tree

Showing 15 changed files with 765 additions and 419 deletions.
diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py
@@ -194,10 +194,8 @@ def sample(
 
         if g.is_homogeneous:
             indices = torch.concat(list(indices))
-            ds.sample_from_nodes(indices.long(), batch_size=batch_size)
-            return HomogeneousSampleReader(
-                ds.get_reader(), self.output_format, self.edge_dir
-            )
+            reader = ds.sample_from_nodes(indices.long(), batch_size=batch_size)
+            return HomogeneousSampleReader(reader, self.output_format, self.edge_dir)
 
         raise ValueError(
             "Sampling heterogeneous graphs is currently"

diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py
@@ -20,7 +20,6 @@
     create_homogeneous_sampled_graphs_from_tensors_csc,
 )
 
-from cugraph.gnn import DistSampleReader
 
 from cugraph.utilities.utils import import_optional
 
@@ -33,14 +32,18 @@ class SampleReader:
     Iterator that processes results from the cuGraph distributed sampler.
     """
 
-    def __init__(self, base_reader: DistSampleReader, output_format: str = "dgl.Block"):
+    def __init__(
+        self,
+        base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]],
+        output_format: str = "dgl.Block",
+    ):
         """
         Constructs a new SampleReader.
 
         Parameters
         ----------
-        base_reader: DistSampleReader
-            The reader responsible for loading saved samples produced by
+        base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]
+            The iterator responsible for loading saved samples produced by
             the cuGraph distributed sampler.
         """
         self.__output_format = output_format
@@ -83,7 +86,7 @@ class HomogeneousSampleReader(SampleReader):
 
     def __init__(
         self,
-        base_reader: DistSampleReader,
+        base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]],
         output_format: str = "dgl.Block",
         edge_dir="in",
     ):
@@ -92,7 +95,7 @@ def __init__(
 
         Parameters
         ----------
-        base_reader: DistSampleReader
+        base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]
             The reader responsible for loading saved samples produced by
             the cuGraph distributed sampler.
         output_format: str

diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py
@@ -185,6 +185,8 @@ def run_train(
     wall_clock_start,
     tempdir=None,
     num_layers=3,
+    in_memory=False,
+    seeds_per_call=-1,
 ):
     optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0005)
 
@@ -196,20 +198,23 @@ def run_train(
     from cugraph_pyg.loader import NeighborLoader
 
     ix_train = split_idx["train"].cuda()
-    train_path = os.path.join(tempdir, f"train_{global_rank}")
-    os.mkdir(train_path)
+    train_path = None if in_memory else os.path.join(tempdir, f"train_{global_rank}")
+    if train_path:
+        os.mkdir(train_path)
     train_loader = NeighborLoader(
         data,
         input_nodes=ix_train,
         directory=train_path,
         shuffle=True,
         drop_last=True,
+        local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None,
         **kwargs,
     )
 
     ix_test = split_idx["test"].cuda()
-    test_path = os.path.join(tempdir, f"test_{global_rank}")
-    os.mkdir(test_path)
+    test_path = None if in_memory else os.path.join(tempdir, f"test_{global_rank}")
+    if test_path:
+        os.mkdir(test_path)
     test_loader = NeighborLoader(
         data,
         input_nodes=ix_test,
@@ -221,14 +226,16 @@ def run_train(
     )
 
     ix_valid = split_idx["valid"].cuda()
-    valid_path = os.path.join(tempdir, f"valid_{global_rank}")
-    os.mkdir(valid_path)
+    valid_path = None if in_memory else os.path.join(tempdir, f"valid_{global_rank}")
+    if valid_path:
+        os.mkdir(valid_path)
     valid_loader = NeighborLoader(
         data,
         input_nodes=ix_valid,
         directory=valid_path,
         shuffle=True,
         drop_last=True,
+        local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None,
         **kwargs,
     )
 
@@ -347,6 +354,9 @@ def parse_args():
     parser.add_argument("--skip_partition", action="store_true")
     parser.add_argument("--wg_mem_type", type=str, default="distributed")
 
+    parser.add_argument("--in_memory", action="store_true", default=False)
+    parser.add_argument("--seeds_per_call", type=int, default=-1)
+
     return parser.parse_args()
 
 
@@ -429,6 +439,8 @@ def parse_args():
                 wall_clock_start,
                 tempdir,
                 args.num_layers,
+                args.in_memory,
+                args.seeds_per_call,
             )
     else:
         warnings.warn("This script should be run with 'torchrun`.  Exiting.")
diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py
@@ -91,17 +91,28 @@ def test(loader: NeighborLoader, val_steps: Optional[int] = None):
 
 
 def create_loader(
-    data, num_neighbors, input_nodes, replace, batch_size, samples_dir, stage_name
+    data,
+    num_neighbors,
+    input_nodes,
+    replace,
+    batch_size,
+    samples_dir,
+    stage_name,
+    local_seeds_per_call,
 ):
-    directory = os.path.join(samples_dir, stage_name)
-    os.mkdir(directory)
+    if samples_dir is not None:
+        directory = os.path.join(samples_dir, stage_name)
+        os.mkdir(directory)
+    else:
+        directory = None
     return NeighborLoader(
         data,
         num_neighbors=num_neighbors,
         input_nodes=input_nodes,
         replace=replace,
         batch_size=batch_size,
         directory=directory,
+        local_seeds_per_call=local_seeds_per_call,
     )
 
 
@@ -147,6 +158,8 @@ def parse_args():
     parser.add_argument("--tempdir_root", type=str, default=None)
     parser.add_argument("--dataset_root", type=str, default="dataset")
     parser.add_argument("--dataset", type=str, default="ogbn-products")
+    parser.add_argument("--in_memory", action="store_true", default=False)
+    parser.add_argument("--seeds_per_call", type=int, default=-1)
 
     return parser.parse_args()
 
@@ -170,7 +183,10 @@ def parse_args():
             "num_neighbors": [args.fan_out] * args.num_layers,
             "replace": False,
             "batch_size": args.batch_size,
-            "samples_dir": samples_dir,
+            "samples_dir": None if args.in_memory else samples_dir,
+            "local_seeds_per_call": None
+            if args.seeds_per_call <= 0
+            else args.seeds_per_call,
         }
 
         train_loader = create_loader(

diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py
@@ -86,6 +86,8 @@ def run_train(
     wall_clock_start,
     tempdir=None,
     num_layers=3,
+    in_memory=False,
+    seeds_per_call=-1,
 ):
 
     init_pytorch_worker(
@@ -119,20 +121,23 @@ def run_train(
     dist.barrier()
 
     ix_train = torch.tensor_split(split_idx["train"], world_size)[rank].cuda()
-    train_path = os.path.join(tempdir, f"train_{rank}")
-    os.mkdir(train_path)
+    train_path = None if in_memory else os.path.join(tempdir, f"train_{rank}")
+    if train_path:
+        os.mkdir(train_path)
     train_loader = NeighborLoader(
         (feature_store, graph_store),
         input_nodes=ix_train,
         directory=train_path,
         shuffle=True,
         drop_last=True,
+        local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None,
         **kwargs,
     )
 
     ix_test = torch.tensor_split(split_idx["test"], world_size)[rank].cuda()
-    test_path = os.path.join(tempdir, f"test_{rank}")
-    os.mkdir(test_path)
+    test_path = None if in_memory else os.path.join(tempdir, f"test_{rank}")
+    if test_path:
+        os.mkdir(test_path)
     test_loader = NeighborLoader(
         (feature_store, graph_store),
         input_nodes=ix_test,
@@ -144,14 +149,16 @@ def run_train(
     )
 
     ix_valid = torch.tensor_split(split_idx["valid"], world_size)[rank].cuda()
-    valid_path = os.path.join(tempdir, f"valid_{rank}")
-    os.mkdir(valid_path)
+    valid_path = None if in_memory else os.path.join(tempdir, f"valid_{rank}")
+    if valid_path:
+        os.mkdir(valid_path)
     valid_loader = NeighborLoader(
         (feature_store, graph_store),
         input_nodes=ix_valid,
         directory=valid_path,
         shuffle=True,
         drop_last=True,
+        local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None,
         **kwargs,
     )
 
@@ -269,6 +276,8 @@ def run_train(
         parser.add_argument("--tempdir_root", type=str, default=None)
         parser.add_argument("--dataset_root", type=str, default="dataset")
         parser.add_argument("--dataset", type=str, default="ogbn-products")
+        parser.add_argument("--in_memory", action="store_true", default=False)
+        parser.add_argument("--seeds_per_call", type=int, default=-1)
 
         parser.add_argument(
             "--n_devices",
@@ -322,6 +331,8 @@ def run_train(
                     wall_clock_start,
                     tempdir,
                     args.num_layers,
+                    args.in_memory,
+                    args.seeds_per_call,
                 ),
                 nprocs=world_size,
                 join=True,

diff --git a/python/cugraph-pyg/cugraph_pyg/loader/neighbor_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/neighbor_loader.py
@@ -12,7 +12,6 @@
 # limitations under the License.
 
 import warnings
-import tempfile
 
 from typing import Union, Tuple, Optional, Callable, List, Dict
 
@@ -123,14 +122,14 @@ def __init__(
             The number of input nodes per output minibatch.
             See torch.utils.dataloader.
         directory: str (optional, default=None)
-            The directory where samples will be temporarily stored.
-            It is recommend that this be set by the user, usually
-            setting it to a tempfile.TemporaryDirectory with a context
+            The directory where samples will be temporarily stored,
+            if spilling samples to disk.  If None, this loader
+            will perform buffered in-memory sampling.
+            If writing to disk, setting this argument
+            to a tempfile.TemporaryDirectory with a context
             manager is a good option but depending on the filesystem,
             you may want to choose an alternative location with fast I/O
             intead.
-            If not set, this will create a TemporaryDirectory that will
-            persist until this object is garbage collected.
             See cugraph.gnn.DistSampleWriter.
         batches_per_partition: int (optional, default=256)
             The number of batches per partition if writing samples to
@@ -182,20 +181,19 @@ def __init__(
             # Will eventually automatically convert these objects to cuGraph objects.
             raise NotImplementedError("Currently can't accept non-cugraph graphs")
 
-        if directory is None:
-            warnings.warn("Setting a directory to store samples is recommended.")
-            self._tempdir = tempfile.TemporaryDirectory()
-            directory = self._tempdir.name
-
         if compression is None:
             compression = "CSR"
         elif compression not in ["CSR", "COO"]:
             raise ValueError("Invalid value for compression (expected 'CSR' or 'COO')")
 
-        writer = DistSampleWriter(
-            directory=directory,
-            batches_per_partition=batches_per_partition,
-            format=format,
+        writer = (
+            None
+            if directory is None
+            else DistSampleWriter(
+                directory=directory,
+                batches_per_partition=batches_per_partition,
+                format=format,
+            )
         )
 
         feature_store, graph_store = data

diff --git a/python/cugraph-pyg/cugraph_pyg/sampler/sampler.py b/python/cugraph-pyg/cugraph_pyg/sampler/sampler.py
@@ -14,7 +14,7 @@
 from typing import Optional, Iterator, Union, Dict, Tuple
 
 from cugraph.utilities.utils import import_optional
-from cugraph.gnn import DistSampler, DistSampleReader
+from cugraph.gnn import DistSampler
 
 from .sampler_utils import filter_cugraph_pyg_store
 
@@ -152,13 +152,15 @@ class SampleReader:
     Iterator that processes results from the cuGraph distributed sampler.
     """
 
-    def __init__(self, base_reader: DistSampleReader):
+    def __init__(
+        self, base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]
+    ):
         """
         Constructs a new SampleReader.
 
         Parameters
         ----------
-        base_reader: DistSampleReader
+        base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]
             The reader responsible for loading saved samples produced by
             the cuGraph distributed sampler.
         """
@@ -202,14 +204,16 @@ class HomogeneousSampleReader(SampleReader):
     produced by the cuGraph distributed sampler.
     """
 
-    def __init__(self, base_reader: DistSampleReader):
+    def __init__(
+        self, base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]
+    ):
         """
         Constructs a new HomogeneousSampleReader
 
         Parameters
         ----------
-        base_reader: DistSampleReader
-            The reader responsible for loading saved samples produced by
+        base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]
+            The iterator responsible for loading saved samples produced by
             the cuGraph distributed sampler.
         """
         super().__init__(base_reader)
@@ -353,7 +357,7 @@ def sample_from_nodes(
             "torch_geometric.sampler.SamplerOutput",
         ]
     ]:
-        self.__sampler.sample_from_nodes(
+        reader = self.__sampler.sample_from_nodes(
             index.node, batch_size=self.__batch_size, **kwargs
         )
 
@@ -362,7 +366,7 @@ def sample_from_nodes(
             len(edge_attrs) == 1
             and edge_attrs[0].edge_type[0] == edge_attrs[0].edge_type[2]
         ):
-            return HomogeneousSampleReader(self.__sampler.get_reader())
+            return HomogeneousSampleReader(reader)
         else:
             # TODO implement heterogeneous sampling
             raise NotImplementedError(

diff --git a/python/cugraph/cugraph/gnn/data_loading/__init__.py b/python/cugraph/cugraph/gnn/data_loading/__init__.py
@@ -14,9 +14,12 @@
 from cugraph.gnn.data_loading.bulk_sampler import BulkSampler
 from cugraph.gnn.data_loading.dist_sampler import (
     DistSampler,
+    NeighborSampler,
+)
+from cugraph.gnn.data_loading.dist_io import (
     DistSampleWriter,
     DistSampleReader,
-    NeighborSampler,
+    BufferedSampleReader,
 )