Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into fix-gatconv
Browse files Browse the repository at this point in the history
  • Loading branch information
tingyu66 authored Jan 9, 2024
2 parents 38d87b7 + c7b720d commit 4072871
Show file tree
Hide file tree
Showing 16 changed files with 213 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
)

from cugraph.structure.symmetrize import symmetrize
from cugraph.experimental.gnn import BulkSampler
from cugraph.gnn import BulkSampler

import cugraph

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/community/flatten_dendrogram.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ void leiden_partition_at_level(raft::handle_t const& handle,
thrust::make_counting_iterator<size_t>(0),
thrust::make_counting_iterator<size_t>((level - 1) / 2),
[&handle, &dendrogram, &local_vertex_ids_v, &d_partition, local_num_verts](size_t l) {
cugraph::relabel<vertex_t, false>(
cugraph::relabel<vertex_t, multi_gpu>(
handle,
std::tuple<vertex_t const*, vertex_t const*>(dendrogram.get_level_ptr_nocheck(2 * l + 1),
dendrogram.get_level_ptr_nocheck(2 * l + 2)),
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import cupy as cp
import cudf
from cugraph.utilities.utils import import_optional
from cugraph.experimental import BulkSampler
from cugraph.gnn import BulkSampler
from dask.distributed import default_client, Event
from cugraph_dgl.dataloading import (
HomogenousBulkSamplerDataset,
Expand Down
6 changes: 1 addition & 5 deletions python/cugraph-pyg/cugraph_pyg/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.utilities.api_tools import experimental_warning_wrapper

from cugraph_pyg.data.cugraph_store import EXPERIMENTAL__CuGraphStore

CuGraphStore = experimental_warning_wrapper(EXPERIMENTAL__CuGraphStore)
from cugraph_pyg.data.cugraph_store import CuGraphStore
2 changes: 1 addition & 1 deletion python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def cast(cls, *args, **kwargs):
return cls(*args, **kwargs)


class EXPERIMENTAL__CuGraphStore:
class CuGraphStore:
"""
Duck-typed version of PyG's GraphStore and FeatureStore.
"""
Expand Down
12 changes: 2 additions & 10 deletions python/cugraph-pyg/cugraph_pyg/loader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.utilities.api_tools import experimental_warning_wrapper
from cugraph_pyg.loader.cugraph_node_loader import CuGraphNeighborLoader

from cugraph_pyg.loader.cugraph_node_loader import EXPERIMENTAL__CuGraphNeighborLoader

CuGraphNeighborLoader = experimental_warning_wrapper(
EXPERIMENTAL__CuGraphNeighborLoader
)

from cugraph_pyg.loader.cugraph_node_loader import EXPERIMENTAL__BulkSampleLoader

BulkSampleLoader = experimental_warning_wrapper(EXPERIMENTAL__BulkSampleLoader)
from cugraph_pyg.loader.cugraph_node_loader import BulkSampleLoader
8 changes: 4 additions & 4 deletions python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import cupy
import cudf

from cugraph.experimental.gnn import BulkSampler
from cugraph.gnn import BulkSampler
from cugraph.utilities.utils import import_optional, MissingModule

from cugraph_pyg.data import CuGraphStore
Expand All @@ -42,7 +42,7 @@
)


class EXPERIMENTAL__BulkSampleLoader:
class BulkSampleLoader:

__ex_parquet_file = re.compile(r"batch=([0-9]+)\-([0-9]+)\.parquet")

Expand Down Expand Up @@ -478,7 +478,7 @@ def __iter__(self):
return self


class EXPERIMENTAL__CuGraphNeighborLoader:
class CuGraphNeighborLoader:
def __init__(
self,
data: Union[CuGraphStore, Tuple[CuGraphStore, CuGraphStore]],
Expand Down Expand Up @@ -527,7 +527,7 @@ def batch_size(self) -> int:
return self.__batch_size

def __iter__(self):
self.current_loader = EXPERIMENTAL__BulkSampleLoader(
self.current_loader = BulkSampleLoader(
self.__feature_store,
self.__graph_store,
self.__input_nodes,
Expand Down
126 changes: 116 additions & 10 deletions python/cugraph/cugraph/datasets/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -12,10 +12,13 @@
# limitations under the License.

import cudf
import dask_cudf
import yaml
import os
import pandas as pd
import cugraph.dask as dcg
from pathlib import Path
import urllib.request
from cugraph.structure.graph_classes import Graph


Expand Down Expand Up @@ -138,9 +141,8 @@ def __download_csv(self, url):

filename = self.metadata["name"] + self.metadata["file_type"]
if self._dl_path.path.is_dir():
df = cudf.read_csv(url)
self._path = self._dl_path.path / filename
df.to_csv(self._path, index=False)
urllib.request.urlretrieve(url, str(self._path))

else:
raise RuntimeError(
Expand All @@ -149,7 +151,6 @@ def __download_csv(self, url):
return self._path

def unload(self):

"""
Remove all saved internal objects, forcing them to be re-created when
accessed.
Expand All @@ -162,7 +163,7 @@ def unload(self):

def get_edgelist(self, download=False, reader="cudf"):
"""
Return an Edgelist
Return an Edgelist.
Parameters
----------
Expand Down Expand Up @@ -212,6 +213,47 @@ def get_edgelist(self, download=False, reader="cudf"):

return self._edgelist.copy()

def get_dask_edgelist(self, download=False):
"""
Return a distributed Edgelist.
Parameters
----------
download : Boolean (default=False)
Automatically download the dataset from the 'url' location within
the YAML file.
"""
if self._edgelist is None:
full_path = self.get_path()
if not full_path.is_file():
if download:
full_path = self.__download_csv(self.metadata["url"])
else:
raise RuntimeError(
f"The datafile {full_path} does not"
" exist. Try setting download=True"
" to download the datafile"
)

header = None
if isinstance(self.metadata["header"], int):
header = self.metadata["header"]

blocksize = dcg.get_chunksize(full_path)
self._edgelist = dask_cudf.read_csv(
path=full_path,
blocksize=blocksize,
delimiter=self.metadata["delim"],
names=self.metadata["col_names"],
dtype={
self.metadata["col_names"][i]: self.metadata["col_types"][i]
for i in range(len(self.metadata["col_types"]))
},
header=header,
)

return self._edgelist.copy()

def get_graph(
self,
download=False,
Expand Down Expand Up @@ -249,10 +291,10 @@ def get_graph(
if create_using is None:
G = Graph()
elif isinstance(create_using, Graph):
# what about BFS if trnaposed is True
# what about BFS if transposed is True
attrs = {"directed": create_using.is_directed()}
G = type(create_using)(**attrs)
elif type(create_using) is type:
elif issubclass(create_using, Graph):
G = create_using()
else:
raise TypeError(
Expand All @@ -277,9 +319,74 @@ def get_graph(
)
return G

def get_dask_graph(
self,
download=False,
create_using=Graph,
ignore_weights=False,
store_transposed=False,
):
"""
Return a distributed Graph object.
Parameters
----------
download : Boolean (default=False)
Downloads the dataset from the web.
create_using: cugraph.Graph (instance or class), optional
(default=Graph)
Specify the type of Graph to create. Can pass in an instance to
create a Graph instance with specified 'directed' attribute.
ignore_weights : Boolean (default=False)
Ignores weights in the dataset if True, resulting in an
unweighted Graph. If False (the default), weights from the
dataset -if present- will be applied to the Graph. If the
dataset does not contain weights, the Graph returned will
be unweighted regardless of ignore_weights.
store_transposed : bool, optional (default=False)
If True, stores the transpose of the adjacency matrix. Required
for certain algorithms.
"""
if self._edgelist is None:
self.get_dask_edgelist(download)

if create_using is None:
G = Graph()
elif isinstance(create_using, Graph):
attrs = {"directed": create_using.is_directed()}
G = type(create_using)(**attrs)
elif issubclass(create_using, Graph):
G = create_using()
else:
raise TypeError(
"create_using must be a cugraph.Graph "
"(or subclass) type or instance, got: "
f"{type(create_using)}"
)

if len(self.metadata["col_names"]) > 2 and not (ignore_weights):
G.from_dask_cudf_edgelist(
self._edgelist,
source=self.metadata["col_names"][0],
destination=self.metadata["col_names"][1],
edge_attr=self.metadata["col_names"][2],
store_transposed=store_transposed,
)
else:
G.from_dask_cudf_edgelist(
self._edgelist,
source=self.metadata["col_names"][0],
destination=self.metadata["col_names"][1],
store_transposed=store_transposed,
)
return G

def get_path(self):
"""
Returns the location of the stored dataset file
Returns the location of the stored dataset file.
"""
if self._path is None:
self._path = self._dl_path.path / (
Expand Down Expand Up @@ -347,8 +454,7 @@ def download_all(force=False):
filename = meta["name"] + meta["file_type"]
save_to = default_download_dir.path / filename
if not save_to.is_file() or force:
df = cudf.read_csv(meta["url"])
df.to_csv(save_to, index=False)
urllib.request.urlretrieve(meta["url"], str(save_to))


def set_download_dir(path):
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@
experimental_warning_wrapper(EXPERIMENTAL__find_bicliques)
)

from cugraph.gnn.data_loading import EXPERIMENTAL__BulkSampler
from cugraph.gnn.data_loading import BulkSampler

BulkSampler = experimental_warning_wrapper(EXPERIMENTAL__BulkSampler)
BulkSampler = promoted_experimental_warning_wrapper(BulkSampler)


from cugraph.link_prediction.jaccard import jaccard, jaccard_coefficient
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/experimental/gnn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.gnn.data_loading import EXPERIMENTAL__BulkSampler
from cugraph.utilities.api_tools import experimental_warning_wrapper
from cugraph.gnn.data_loading import BulkSampler
from cugraph.utilities.api_tools import promoted_experimental_warning_wrapper

BulkSampler = experimental_warning_wrapper(EXPERIMENTAL__BulkSampler)
BulkSampler = promoted_experimental_warning_wrapper(BulkSampler)
1 change: 1 addition & 0 deletions python/cugraph/cugraph/gnn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
# limitations under the License.

from .feature_storage.feat_storage import FeatureStore
from .data_loading.bulk_sampler import BulkSampler
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/gnn/data_loading/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.gnn.data_loading.bulk_sampler import EXPERIMENTAL__BulkSampler
from cugraph.gnn.data_loading.bulk_sampler import BulkSampler
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import time


class EXPERIMENTAL__BulkSampler:
class BulkSampler:
"""
Performs sampling based on input seeds grouped into batches by
a batch id. Writes the output minibatches to parquet, with
Expand Down Expand Up @@ -158,7 +158,7 @@ def add_batches(
Examples
--------
>>> import cudf
>>> from cugraph.experimental.gnn import BulkSampler
>>> from cugraph.gnn import BulkSampler
>>> from cugraph.datasets import karate
>>> import tempfile
>>> df = cudf.DataFrame({
Expand Down
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import cupy
import cugraph
from cugraph.datasets import karate, email_Eu_core
from cugraph.experimental.gnn import BulkSampler
from cugraph.gnn import BulkSampler
from cugraph.utilities.utils import create_directory_with_overwrite

import os
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import cugraph
import dask_cudf
from cugraph.datasets import karate, email_Eu_core
from cugraph.experimental import BulkSampler
from cugraph.gnn import BulkSampler
from cugraph.utilities.utils import create_directory_with_overwrite


Expand Down
Loading

0 comments on commit 4072871

Please sign in to comment.