diff --git a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py index 1ca5d6db637..9de6c3a2b01 100644 --- a/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py +++ b/benchmarks/cugraph/standalone/bulk_sampling/cugraph_bulk_sampling.py @@ -28,7 +28,7 @@ ) from cugraph.structure.symmetrize import symmetrize -from cugraph.experimental.gnn import BulkSampler +from cugraph.gnn import BulkSampler import cugraph diff --git a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/README.md b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/README.md new file mode 100644 index 00000000000..26dbbd5e705 --- /dev/null +++ b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/README.md @@ -0,0 +1,6 @@ +This directory contains various scripts helpful for cugraph users and developers. + +The following scripts were copied from https://github.com/rapidsai/multi-gpu-tools and are useful for starting a dask cluster, which is needed by cugraph for multi-GPU support. +* `run-dask-process.sh` +* `functions.sh` +* `default-config.sh` diff --git a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/default-config.sh b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/default-config.sh new file mode 100755 index 00000000000..26cef2aee78 --- /dev/null +++ b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/default-config.sh @@ -0,0 +1,39 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +THIS_DIR=$(cd $(dirname ${BASH_SOURCE[0]}) && pwd) + +# Most are defined using the bash := or :- syntax, which means they +# will be set only if they were previously unset. The project config +# is loaded first, which gives it the opportunity to override anything +# in this file that uses that syntax. If there are variables in this +# file that should not be overridded by a project, then they will +# simply not use that syntax and override, since these variables are +# read last. +SCRIPTS_DIR=$THIS_DIR +WORKSPACE=$THIS_DIR + +# These really should be oerridden by the project config! +CONDA_ENV=${CONDA_ENV:-rapids} + +GPUS_PER_NODE=${GPUS_PER_NODE:-8} +WORKER_RMM_POOL_SIZE=${WORKER_RMM_POOL_SIZE:-12G} +DASK_CUDA_INTERFACE=${DASK_CUDA_INTERFACE:-ib0} +DASK_SCHEDULER_PORT=${DASK_SCHEDULER_PORT:-8792} +DASK_DEVICE_MEMORY_LIMIT=${DASK_DEVICE_MEMORY_LIMIT:-auto} +DASK_HOST_MEMORY_LIMIT=${DASK_HOST_MEMORY_LIMIT:-auto} + +BUILD_LOG_FILE=${BUILD_LOG_FILE:-${RESULTS_DIR}/build_log.txt} +SCHEDULER_FILE=${SCHEDULER_FILE:-${WORKSPACE}/dask-scheduler.json} +DATE=${DATE:-$(date --utc "+%Y-%m-%d_%H:%M:%S")_UTC} +ENV_EXPORT_FILE=${ENV_EXPORT_FILE:-${WORKSPACE}/$(basename ${CONDA_ENV})-${DATE}.txt} diff --git a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/functions.sh b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/functions.sh new file mode 100644 index 00000000000..7eedb5f1b1f --- /dev/null +++ b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/functions.sh @@ -0,0 +1,66 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is source'd from script-env.sh to add functions to the +# calling environment, hence no #!/bin/bash as the first line. This +# also assumes the variables used in this file have been defined +# elsewhere. + +NUMARGS=$# +ARGS=$* +function hasArg { + (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") +} + +function logger { + echo -e ">>>> $@" +} + +# Calling "setTee outfile" will cause all stdout and stderr of the +# current script to be output to "tee", which outputs to stdout and +# "outfile" simultaneously. This is useful by allowing a script to +# "tee" itself at any point without being called with tee. +_origFileDescriptorsSaved=0 +function setTee { + if [[ $_origFileDescriptorsSaved == 0 ]]; then + # Save off the original file descr 1 and 2 as 3 and 4 + exec 3>&1 4>&2 + _origFileDescriptorsSaved=1 + fi + teeFile=$1 + # Create a named pipe. + pipeName=$(mktemp -u) + mkfifo $pipeName + # Close the currnet 1 and 2 and restore to original (3, 4) in the + # event this function is called repeatedly. + exec 1>&- 2>&- + exec 1>&3 2>&4 + # Start a tee process reading from the named pipe. Redirect stdout + # and stderr to the named pipe which goes to the tee process. The + # named pipe "file" can be removed and the tee process stays alive + # until the fd is closed. + tee -a < $pipeName $teeFile & + exec > $pipeName 2>&1 + rm $pipeName +} + +# Call this to stop script output from going to "tee" after a prior +# call to setTee. +function unsetTee { + if [[ $_origFileDescriptorsSaved == 1 ]]; then + # Close the current fd 1 and 2 which should stop the tee + # process, then restore 1 and 2 to original (saved as 3, 4). + exec 1>&- 2>&- + exec 1>&3 2>&4 + fi +} diff --git a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/run-dask-process.sh b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/run-dask-process.sh new file mode 100755 index 00000000000..b88abb685ec --- /dev/null +++ b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/run-dask-process.sh @@ -0,0 +1,247 @@ +#!/bin/bash +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +THIS_DIR=$(cd $(dirname ${BASH_SOURCE[0]}) && pwd) + +source ${THIS_DIR}/default-config.sh +source ${THIS_DIR}/functions.sh + +# Logs can be written to a specific location by setting the LOGS_DIR +# env var. +LOGS_DIR=${LOGS_DIR:-dask_logs-$$} + +######################################## +NUMARGS=$# +ARGS=$* +function hasArg { + (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") +} +VALIDARGS="-h --help scheduler workers --tcp --ucx --ucxib --ucx-ib" +HELP="$0 [ ...] [ ...] + where is: + scheduler - start dask scheduler + workers - start dask workers + and is: + --tcp - initalize a tcp cluster (default) + --ucx - initialize a ucx cluster with NVLink + --ucxib | --ucx-ib - initialize a ucx cluster with IB+NVLink + -h | --help - print this text + + The cluster config order of precedence is any specification on the + command line (--tcp, --ucx, etc.) if provided, then the value of the + env var CLUSTER_CONFIG_TYPE if set, then the default value of tcp. + +" + +# CLUSTER_CONFIG_TYPE defaults to the env var value if set, else TCP +CLUSTER_CONFIG_TYPE=${CLUSTER_CONFIG_TYPE:-TCP} +START_SCHEDULER=0 +START_WORKERS=0 + +if (( ${NUMARGS} == 0 )); then + echo "${HELP}" + exit 0 +else + if hasArg -h || hasArg --help; then + echo "${HELP}" + exit 0 + fi + for a in ${ARGS}; do + if ! (echo " ${VALIDARGS} " | grep -q " ${a} "); then + echo "Invalid option: ${a}" + exit 1 + fi + done +fi + +if hasArg scheduler; then + START_SCHEDULER=1 +fi +if hasArg workers; then + START_WORKERS=1 +fi +# Allow the command line to take precedence +if hasArg --tcp; then + CLUSTER_CONFIG_TYPE=TCP +elif hasArg --ucx; then + CLUSTER_CONFIG_TYPE=UCX +elif hasArg --ucxib || hasArg --ucx-ib; then + CLUSTER_CONFIG_TYPE=UCXIB +fi + +######################################## + +#export DASK_LOGGING__DISTRIBUTED="DEBUG" + +#ulimit -n 100000 + +SCHEDULER_LOG=${LOGS_DIR}/scheduler_log.txt +WORKERS_LOG=${LOGS_DIR}/worker-${HOSTNAME}_log.txt + + +function buildTcpArgs { + export DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="100s" + export DASK_DISTRIBUTED__COMM__TIMEOUTS__TCP="600s" + export DASK_DISTRIBUTED__COMM__RETRY__DELAY__MIN="1s" + export DASK_DISTRIBUTED__COMM__RETRY__DELAY__MAX="60s" + export DASK_DISTRIBUTED__WORKER__MEMORY__Terminate="False" + + SCHEDULER_ARGS="--protocol=tcp + --scheduler-file $SCHEDULER_FILE + " + + WORKER_ARGS="--rmm-pool-size=$WORKER_RMM_POOL_SIZE + --rmm-async + --local-directory=/tmp/$LOGNAME + --scheduler-file=$SCHEDULER_FILE + --memory-limit=$DASK_HOST_MEMORY_LIMIT + --device-memory-limit=$DASK_DEVICE_MEMORY_LIMIT + " + +} + +function buildUCXWithInfinibandArgs { + + export UCX_MAX_RNDV_RAILS=1 + export UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda + export DASK_RMM__POOL_SIZE=0.5GB + export DASK_DISTRIBUTED__COMM__UCX__CREATE_CUDA_CONTEXT=True + + SCHEDULER_ARGS="--protocol=ucx + --interface=$DASK_CUDA_INTERFACE + --scheduler-file $SCHEDULER_FILE + " + + WORKER_ARGS="--interface=$DASK_CUDA_INTERFACE + --rmm-pool-size=$WORKER_RMM_POOL_SIZE + --rmm-maximum-pool-size=$WORKER_RMM_POOL_SIZE + --local-directory=/tmp/$LOGNAME + --scheduler-file=$SCHEDULER_FILE + --memory-limit=$DASK_HOST_MEMORY_LIMIT + --device-memory-limit=$DASK_DEVICE_MEMORY_LIMIT + --enable-jit-unspill + " +} + + +function buildUCXwithoutInfinibandArgs { + + export UCX_TCP_CM_REUSEADDR=y + export UCX_MAX_RNDV_RAILS=1 + export UCX_TCP_TX_SEG_SIZE=8M + export UCX_TCP_RX_SEG_SIZE=8M + + export DASK_DISTRIBUTED__COMM__UCX__CUDA_COPY=True + export DASK_DISTRIBUTED__COMM__UCX__TCP=True + export DASK_DISTRIBUTED__COMM__UCX__NVLINK=True + export DASK_DISTRIBUTED__COMM__UCX__INFINIBAND=False + export DASK_DISTRIBUTED__COMM__UCX__RDMACM=False + export DASK_RMM__POOL_SIZE=0.5GB + + + SCHEDULER_ARGS="--protocol=ucx + --scheduler-file $SCHEDULER_FILE + " + + WORKER_ARGS="--enable-tcp-over-ucx + --enable-nvlink + --disable-infiniband + --disable-rdmacm + --rmm-pool-size=$WORKER_RMM_POOL_SIZE + --rmm-maximum-pool-size=$WORKER_RMM_POOL_SIZE + --local-directory=/tmp/$LOGNAME + --scheduler-file=$SCHEDULER_FILE + --memory-limit=$DASK_HOST_MEMORY_LIMIT + --device-memory-limit=$DASK_DEVICE_MEMORY_LIMIT + --enable-jit-unspill + " +} + +if [[ "$CLUSTER_CONFIG_TYPE" == "UCX" ]]; then + logger "Using cluster configurtion for UCX" + buildUCXwithoutInfinibandArgs +elif [[ "$CLUSTER_CONFIG_TYPE" == "UCXIB" ]]; then + logger "Using cluster configurtion for UCX with Infiniband" + buildUCXWithInfinibandArgs +else + logger "Using cluster configurtion for TCP" + buildTcpArgs +fi + + +######################################## + +scheduler_pid="" +worker_pid="" +num_scheduler_tries=0 + +function startScheduler { + mkdir -p $(dirname $SCHEDULER_FILE) + echo "RUNNING: \"python -m distributed.cli.dask_scheduler $SCHEDULER_ARGS\"" > $SCHEDULER_LOG + dask-scheduler $SCHEDULER_ARGS >> $SCHEDULER_LOG 2>&1 & + scheduler_pid=$! +} + +mkdir -p $LOGS_DIR +logger "Logs written to: $LOGS_DIR" + +if [[ $START_SCHEDULER == 1 ]]; then + rm -f $SCHEDULER_FILE $SCHEDULER_LOG $WORKERS_LOG + + startScheduler + sleep 6 + num_scheduler_tries=$(python -c "print($num_scheduler_tries+1)") + + # Wait for the scheduler to start first before proceeding, since + # it may require several retries (if prior run left ports open + # that need time to close, etc.) + while [ ! -f "$SCHEDULER_FILE" ]; do + scheduler_alive=$(ps -p $scheduler_pid > /dev/null ; echo $?) + if [[ $scheduler_alive != 0 ]]; then + if [[ $num_scheduler_tries != 30 ]]; then + echo "scheduler failed to start, retry #$num_scheduler_tries" + startScheduler + sleep 6 + num_scheduler_tries=$(echo $num_scheduler_tries+1 | bc) + else + echo "could not start scheduler, exiting." + exit 1 + fi + fi + done + echo "scheduler started." +fi + +if [[ $START_WORKERS == 1 ]]; then + rm -f $WORKERS_LOG + while [ ! -f "$SCHEDULER_FILE" ]; do + echo "run-dask-process.sh: $SCHEDULER_FILE not present - waiting to start workers..." + sleep 2 + done + echo "RUNNING: \"python -m dask_cuda.cli.dask_cuda_worker $WORKER_ARGS\"" > $WORKERS_LOG + dask-cuda-worker $WORKER_ARGS >> $WORKERS_LOG 2>&1 & + worker_pid=$! + echo "worker(s) started." +fi + +# This script will not return until the following background process +# have been completed/killed. +if [[ $worker_pid != "" ]]; then + echo "waiting for worker pid $worker_pid to finish before exiting script..." + wait $worker_pid +fi +if [[ $scheduler_pid != "" ]]; then + echo "waiting for scheduler pid $scheduler_pid to finish before exiting script..." + wait $scheduler_pid +fi diff --git a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/wait_for_workers.py b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/wait_for_workers.py new file mode 100644 index 00000000000..29d5cb7fbd7 --- /dev/null +++ b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/wait_for_workers.py @@ -0,0 +1,124 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import time +import yaml + +from dask.distributed import Client + + +def initialize_dask_cuda(communication_type): + communication_type = communication_type.lower() + if "ucx" in communication_type: + os.environ["UCX_MAX_RNDV_RAILS"] = "1" + + if communication_type == "ucx-ib": + os.environ["UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES"]="cuda" + os.environ["DASK_RMM__POOL_SIZE"]="0.5GB" + os.environ["DASK_DISTRIBUTED__COMM__UCX__CREATE_CUDA_CONTEXT"]="True" + + +def wait_for_workers( + num_expected_workers, scheduler_file_path, communication_type, timeout_after=0 +): + """ + Waits until num_expected_workers workers are available based on + the workers managed by scheduler_file_path, then returns 0. If + timeout_after is specified, will return 1 if num_expected_workers + workers are not available before the timeout. + """ + # FIXME: use scheduler file path from global environment if none + # supplied in configuration yaml + + print("wait_for_workers.py - initializing client...", end="") + sys.stdout.flush() + initialize_dask_cuda(communication_type) + print("done.") + sys.stdout.flush() + + ready = False + start_time = time.time() + while not ready: + if timeout_after and ((time.time() - start_time) >= timeout_after): + print( + f"wait_for_workers.py timed out after {timeout_after} seconds before finding {num_expected_workers} workers." + ) + sys.stdout.flush() + break + with Client(scheduler_file=scheduler_file_path) as client: + num_workers = len(client.scheduler_info()["workers"]) + if num_workers < num_expected_workers: + print( + f"wait_for_workers.py expected {num_expected_workers} but got {num_workers}, waiting..." + ) + sys.stdout.flush() + time.sleep(5) + else: + print(f"wait_for_workers.py got {num_workers} workers, done.") + sys.stdout.flush() + ready = True + + if ready is False: + return 1 + return 0 + + +if __name__ == "__main__": + import argparse + + ap = argparse.ArgumentParser() + ap.add_argument( + "--num-expected-workers", + type=int, + required=False, + help="Number of workers to wait for. If not specified, " + "uses the NUM_WORKERS env var if set, otherwise defaults " + "to 16.", + ) + ap.add_argument( + "--scheduler-file-path", + type=str, + required=True, + help="Path to shared scheduler file to read.", + ) + ap.add_argument( + "--communication-type", + type=str, + default="tcp", + required=False, + help="Initiliaze dask_cuda based on the cluster communication type." + "Supported values are tcp(default), ucx, ucxib, ucx-ib.", + ) + ap.add_argument( + "--timeout-after", + type=int, + default=0, + required=False, + help="Number of seconds to wait for workers. " + "Default is 0 which means wait forever.", + ) + args = ap.parse_args() + + if args.num_expected_workers is None: + args.num_expected_workers = os.environ.get("NUM_WORKERS", 16) + + exitcode = wait_for_workers( + num_expected_workers=args.num_expected_workers, + scheduler_file_path=args.scheduler_file_path, + communication_type=args.communication_type, + timeout_after=args.timeout_after, + ) + + sys.exit(exitcode) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index f154b096256..11139910931 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -17,7 +17,7 @@ import cupy as cp import cudf from cugraph.utilities.utils import import_optional -from cugraph.experimental import BulkSampler +from cugraph.gnn import BulkSampler from dask.distributed import default_client, Event from cugraph_dgl.dataloading import ( HomogenousBulkSamplerDataset, diff --git a/python/cugraph-pyg/cugraph_pyg/data/__init__.py b/python/cugraph-pyg/cugraph_pyg/data/__init__.py index 0567b69ecf2..66a9843c047 100644 --- a/python/cugraph-pyg/cugraph_pyg/data/__init__.py +++ b/python/cugraph-pyg/cugraph_pyg/data/__init__.py @@ -11,8 +11,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.utilities.api_tools import experimental_warning_wrapper - -from cugraph_pyg.data.cugraph_store import EXPERIMENTAL__CuGraphStore - -CuGraphStore = experimental_warning_wrapper(EXPERIMENTAL__CuGraphStore) +from cugraph_pyg.data.cugraph_store import CuGraphStore diff --git a/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py b/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py index 14dc5d84f90..05d540b7c45 100644 --- a/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py @@ -199,7 +199,7 @@ def cast(cls, *args, **kwargs): return cls(*args, **kwargs) -class EXPERIMENTAL__CuGraphStore: +class CuGraphStore: """ Duck-typed version of PyG's GraphStore and FeatureStore. """ diff --git a/python/cugraph-pyg/cugraph_pyg/loader/__init__.py b/python/cugraph-pyg/cugraph_pyg/loader/__init__.py index 0682c598fdf..2c3d7eff89e 100644 --- a/python/cugraph-pyg/cugraph_pyg/loader/__init__.py +++ b/python/cugraph-pyg/cugraph_pyg/loader/__init__.py @@ -11,14 +11,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.utilities.api_tools import experimental_warning_wrapper +from cugraph_pyg.loader.cugraph_node_loader import CuGraphNeighborLoader -from cugraph_pyg.loader.cugraph_node_loader import EXPERIMENTAL__CuGraphNeighborLoader - -CuGraphNeighborLoader = experimental_warning_wrapper( - EXPERIMENTAL__CuGraphNeighborLoader -) - -from cugraph_pyg.loader.cugraph_node_loader import EXPERIMENTAL__BulkSampleLoader - -BulkSampleLoader = experimental_warning_wrapper(EXPERIMENTAL__BulkSampleLoader) +from cugraph_pyg.loader.cugraph_node_loader import BulkSampleLoader diff --git a/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py index 200a82b460b..8a1db4edf29 100644 --- a/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py +++ b/python/cugraph-pyg/cugraph_pyg/loader/cugraph_node_loader.py @@ -20,7 +20,7 @@ import cupy import cudf -from cugraph.experimental.gnn import BulkSampler +from cugraph.gnn import BulkSampler from cugraph.utilities.utils import import_optional, MissingModule from cugraph_pyg.data import CuGraphStore @@ -42,7 +42,7 @@ ) -class EXPERIMENTAL__BulkSampleLoader: +class BulkSampleLoader: __ex_parquet_file = re.compile(r"batch=([0-9]+)\-([0-9]+)\.parquet") @@ -478,7 +478,7 @@ def __iter__(self): return self -class EXPERIMENTAL__CuGraphNeighborLoader: +class CuGraphNeighborLoader: def __init__( self, data: Union[CuGraphStore, Tuple[CuGraphStore, CuGraphStore]], @@ -527,7 +527,7 @@ def batch_size(self) -> int: return self.__batch_size def __iter__(self): - self.current_loader = EXPERIMENTAL__BulkSampleLoader( + self.current_loader = BulkSampleLoader( self.__feature_store, self.__graph_store, self.__input_nodes, diff --git a/python/cugraph/cugraph/experimental/gnn/__init__.py b/python/cugraph/cugraph/experimental/gnn/__init__.py index 2f06bb20abe..9c366a2ee28 100644 --- a/python/cugraph/cugraph/experimental/gnn/__init__.py +++ b/python/cugraph/cugraph/experimental/gnn/__init__.py @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.gnn.data_loading import EXPERIMENTAL__BulkSampler -from cugraph.utilities.api_tools import experimental_warning_wrapper +from cugraph.gnn.data_loading import BulkSampler +from cugraph.utilities.api_tools import promoted_experimental_warning_wrapper -BulkSampler = experimental_warning_wrapper(EXPERIMENTAL__BulkSampler) +BulkSampler = promoted_experimental_warning_wrapper(BulkSampler) diff --git a/python/cugraph/cugraph/gnn/__init__.py b/python/cugraph/cugraph/gnn/__init__.py index a62e0cbd242..f8a3035440b 100644 --- a/python/cugraph/cugraph/gnn/__init__.py +++ b/python/cugraph/cugraph/gnn/__init__.py @@ -12,3 +12,4 @@ # limitations under the License. from .feature_storage.feat_storage import FeatureStore +from .data_loading.bulk_sampler import BulkSampler diff --git a/python/cugraph/cugraph/gnn/data_loading/__init__.py b/python/cugraph/cugraph/gnn/data_loading/__init__.py index 6150bf5b422..4b725fba75a 100644 --- a/python/cugraph/cugraph/gnn/data_loading/__init__.py +++ b/python/cugraph/cugraph/gnn/data_loading/__init__.py @@ -11,4 +11,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.gnn.data_loading.bulk_sampler import EXPERIMENTAL__BulkSampler +from cugraph.gnn.data_loading.bulk_sampler import BulkSampler diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py index dbfcb124ce5..ff72e0ea2d6 100644 --- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py +++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler.py @@ -31,7 +31,7 @@ import time -class EXPERIMENTAL__BulkSampler: +class BulkSampler: """ Performs sampling based on input seeds grouped into batches by a batch id. Writes the output minibatches to parquet, with @@ -158,7 +158,7 @@ def add_batches( Examples -------- >>> import cudf - >>> from cugraph.experimental.gnn import BulkSampler + >>> from cugraph.gnn import BulkSampler >>> from cugraph.datasets import karate >>> import tempfile >>> df = cudf.DataFrame({ diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py index a945881394b..943681fb6ff 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py @@ -17,7 +17,7 @@ import cupy import cugraph from cugraph.datasets import karate, email_Eu_core -from cugraph.experimental.gnn import BulkSampler +from cugraph.gnn import BulkSampler from cugraph.utilities.utils import create_directory_with_overwrite import os diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py index aee81e5ffed..1f7c4277773 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py @@ -22,7 +22,7 @@ import cugraph import dask_cudf from cugraph.datasets import karate, email_Eu_core -from cugraph.experimental import BulkSampler +from cugraph.gnn import BulkSampler from cugraph.utilities.utils import create_directory_with_overwrite