From a58d358eb3c182d7087c30aab9ed3b6bdbe636d3 Mon Sep 17 00:00:00 2001
From: Alexandria Barghi <abarghi@nvidia.com>
Date: Wed, 27 Dec 2023 10:31:19 -0800
Subject: [PATCH] remove test files

---
 .../bulk_sampling/mg_utils/README.md          |   6 -
 .../bulk_sampling/mg_utils/default-config.sh  |  39 ---
 .../bulk_sampling/mg_utils/functions.sh       |  66 -----
 .../mg_utils/run-dask-process.sh              | 247 ------------------
 .../mg_utils/wait_for_workers.py              | 124 ---------
 5 files changed, 482 deletions(-)
 delete mode 100644 benchmarks/cugraph/standalone/bulk_sampling/mg_utils/README.md
 delete mode 100755 benchmarks/cugraph/standalone/bulk_sampling/mg_utils/default-config.sh
 delete mode 100644 benchmarks/cugraph/standalone/bulk_sampling/mg_utils/functions.sh
 delete mode 100755 benchmarks/cugraph/standalone/bulk_sampling/mg_utils/run-dask-process.sh
 delete mode 100644 benchmarks/cugraph/standalone/bulk_sampling/mg_utils/wait_for_workers.py

diff --git a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/README.md b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/README.md
deleted file mode 100644
index 26dbbd5e705..00000000000
--- a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-This directory contains various scripts helpful for cugraph users and developers.
-
-The following scripts were copied from https://github.com/rapidsai/multi-gpu-tools and are useful for starting a dask cluster, which is needed by cugraph for multi-GPU support.
-* `run-dask-process.sh`
-* `functions.sh`
-* `default-config.sh`
diff --git a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/default-config.sh b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/default-config.sh
deleted file mode 100755
index 26cef2aee78..00000000000
--- a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/default-config.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-THIS_DIR=$(cd $(dirname ${BASH_SOURCE[0]}) && pwd)
-
-# Most are defined using the bash := or :- syntax, which means they
-# will be set only if they were previously unset. The project config
-# is loaded first, which gives it the opportunity to override anything
-# in this file that uses that syntax.  If there are variables in this
-# file that should not be overridded by a project, then they will
-# simply not use that syntax and override, since these variables are
-# read last.
-SCRIPTS_DIR=$THIS_DIR
-WORKSPACE=$THIS_DIR
-
-# These really should be oerridden by the project config!
-CONDA_ENV=${CONDA_ENV:-rapids}
-
-GPUS_PER_NODE=${GPUS_PER_NODE:-8}
-WORKER_RMM_POOL_SIZE=${WORKER_RMM_POOL_SIZE:-12G}
-DASK_CUDA_INTERFACE=${DASK_CUDA_INTERFACE:-ib0}
-DASK_SCHEDULER_PORT=${DASK_SCHEDULER_PORT:-8792}
-DASK_DEVICE_MEMORY_LIMIT=${DASK_DEVICE_MEMORY_LIMIT:-auto}
-DASK_HOST_MEMORY_LIMIT=${DASK_HOST_MEMORY_LIMIT:-auto}
-
-BUILD_LOG_FILE=${BUILD_LOG_FILE:-${RESULTS_DIR}/build_log.txt}
-SCHEDULER_FILE=${SCHEDULER_FILE:-${WORKSPACE}/dask-scheduler.json}
-DATE=${DATE:-$(date --utc "+%Y-%m-%d_%H:%M:%S")_UTC}
-ENV_EXPORT_FILE=${ENV_EXPORT_FILE:-${WORKSPACE}/$(basename ${CONDA_ENV})-${DATE}.txt}
diff --git a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/functions.sh b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/functions.sh
deleted file mode 100644
index 7eedb5f1b1f..00000000000
--- a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/functions.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This file is source'd from script-env.sh to add functions to the
-# calling environment, hence no #!/bin/bash as the first line. This
-# also assumes the variables used in this file have been defined
-# elsewhere.
-
-NUMARGS=$#
-ARGS=$*
-function hasArg {
-    (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
-}
-
-function logger {
-  echo -e ">>>> $@"
-}
-
-# Calling "setTee outfile" will cause all stdout and stderr of the
-# current script to be output to "tee", which outputs to stdout and
-# "outfile" simultaneously. This is useful by allowing a script to
-# "tee" itself at any point without being called with tee.
-_origFileDescriptorsSaved=0
-function setTee {
-    if [[ $_origFileDescriptorsSaved == 0 ]]; then
-        # Save off the original file descr 1 and 2 as 3 and 4
-        exec 3>&1 4>&2
-        _origFileDescriptorsSaved=1
-    fi
-    teeFile=$1
-    # Create a named pipe.
-    pipeName=$(mktemp -u)
-    mkfifo $pipeName
-    # Close the currnet 1 and 2 and restore to original (3, 4) in the
-    # event this function is called repeatedly.
-    exec 1>&- 2>&-
-    exec 1>&3 2>&4
-    # Start a tee process reading from the named pipe. Redirect stdout
-    # and stderr to the named pipe which goes to the tee process. The
-    # named pipe "file" can be removed and the tee process stays alive
-    # until the fd is closed.
-    tee -a < $pipeName $teeFile &
-    exec > $pipeName 2>&1
-    rm $pipeName
-}
-
-# Call this to stop script output from going to "tee" after a prior
-# call to setTee.
-function unsetTee {
-    if [[ $_origFileDescriptorsSaved == 1 ]]; then
-        # Close the current fd 1 and 2 which should stop the tee
-        # process, then restore 1 and 2 to original (saved as 3, 4).
-        exec 1>&- 2>&-
-        exec 1>&3 2>&4
-    fi
-}
diff --git a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/run-dask-process.sh b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/run-dask-process.sh
deleted file mode 100755
index b88abb685ec..00000000000
--- a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/run-dask-process.sh
+++ /dev/null
@@ -1,247 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-THIS_DIR=$(cd $(dirname ${BASH_SOURCE[0]}) && pwd)
-
-source ${THIS_DIR}/default-config.sh
-source ${THIS_DIR}/functions.sh
-
-# Logs can be written to a specific location by setting the LOGS_DIR
-# env var.
-LOGS_DIR=${LOGS_DIR:-dask_logs-$$}
-
-########################################
-NUMARGS=$#
-ARGS=$*
-function hasArg {
-    (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
-}
-VALIDARGS="-h --help scheduler workers --tcp --ucx --ucxib --ucx-ib"
-HELP="$0 [<app> ...] [<flag> ...]
- where <app> is:
-   scheduler               - start dask scheduler
-   workers                 - start dask workers
- and <flag> is:
-   --tcp                   - initalize a tcp cluster (default)
-   --ucx                   - initialize a ucx cluster with NVLink
-   --ucxib | --ucx-ib      - initialize a ucx cluster with IB+NVLink
-   -h | --help             - print this text
-
- The cluster config order of precedence is any specification on the
- command line (--tcp, --ucx, etc.) if provided, then the value of the
- env var CLUSTER_CONFIG_TYPE if set, then the default value of tcp.
-
-"
-
-# CLUSTER_CONFIG_TYPE defaults to the env var value if set, else TCP
-CLUSTER_CONFIG_TYPE=${CLUSTER_CONFIG_TYPE:-TCP}
-START_SCHEDULER=0
-START_WORKERS=0
-
-if (( ${NUMARGS} == 0 )); then
-    echo "${HELP}"
-    exit 0
-else
-    if hasArg -h || hasArg --help; then
-        echo "${HELP}"
-        exit 0
-    fi
-    for a in ${ARGS}; do
-        if ! (echo " ${VALIDARGS} " | grep -q " ${a} "); then
-            echo "Invalid option: ${a}"
-            exit 1
-        fi
-    done
-fi
-
-if hasArg scheduler; then
-    START_SCHEDULER=1
-fi
-if hasArg workers; then
-    START_WORKERS=1
-fi
-# Allow the command line to take precedence
-if hasArg --tcp; then
-    CLUSTER_CONFIG_TYPE=TCP
-elif hasArg --ucx; then
-    CLUSTER_CONFIG_TYPE=UCX
-elif hasArg --ucxib || hasArg --ucx-ib; then
-    CLUSTER_CONFIG_TYPE=UCXIB
-fi
-
-########################################
-
-#export DASK_LOGGING__DISTRIBUTED="DEBUG"
-
-#ulimit -n 100000
-
-SCHEDULER_LOG=${LOGS_DIR}/scheduler_log.txt
-WORKERS_LOG=${LOGS_DIR}/worker-${HOSTNAME}_log.txt
-
-
-function buildTcpArgs {
-    export DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="100s"
-    export DASK_DISTRIBUTED__COMM__TIMEOUTS__TCP="600s"
-    export DASK_DISTRIBUTED__COMM__RETRY__DELAY__MIN="1s"
-    export DASK_DISTRIBUTED__COMM__RETRY__DELAY__MAX="60s"
-    export DASK_DISTRIBUTED__WORKER__MEMORY__Terminate="False"
-
-    SCHEDULER_ARGS="--protocol=tcp
-                    --scheduler-file $SCHEDULER_FILE
-                "
-
-    WORKER_ARGS="--rmm-pool-size=$WORKER_RMM_POOL_SIZE
-             --rmm-async
-             --local-directory=/tmp/$LOGNAME
-             --scheduler-file=$SCHEDULER_FILE
-             --memory-limit=$DASK_HOST_MEMORY_LIMIT
-             --device-memory-limit=$DASK_DEVICE_MEMORY_LIMIT
-            "
-
-}
-
-function buildUCXWithInfinibandArgs {
-
-    export UCX_MAX_RNDV_RAILS=1
-    export UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES=cuda
-    export DASK_RMM__POOL_SIZE=0.5GB
-    export DASK_DISTRIBUTED__COMM__UCX__CREATE_CUDA_CONTEXT=True
-
-    SCHEDULER_ARGS="--protocol=ucx
-                --interface=$DASK_CUDA_INTERFACE
-                --scheduler-file $SCHEDULER_FILE
-               "
-
-    WORKER_ARGS="--interface=$DASK_CUDA_INTERFACE
-                --rmm-pool-size=$WORKER_RMM_POOL_SIZE
-                --rmm-maximum-pool-size=$WORKER_RMM_POOL_SIZE
-                --local-directory=/tmp/$LOGNAME
-                --scheduler-file=$SCHEDULER_FILE
-                --memory-limit=$DASK_HOST_MEMORY_LIMIT
-                --device-memory-limit=$DASK_DEVICE_MEMORY_LIMIT
-                --enable-jit-unspill
-                "
-}
-
-
-function buildUCXwithoutInfinibandArgs {
-
-    export UCX_TCP_CM_REUSEADDR=y
-    export UCX_MAX_RNDV_RAILS=1
-    export UCX_TCP_TX_SEG_SIZE=8M
-    export UCX_TCP_RX_SEG_SIZE=8M
-
-    export DASK_DISTRIBUTED__COMM__UCX__CUDA_COPY=True
-    export DASK_DISTRIBUTED__COMM__UCX__TCP=True
-    export DASK_DISTRIBUTED__COMM__UCX__NVLINK=True
-    export DASK_DISTRIBUTED__COMM__UCX__INFINIBAND=False
-    export DASK_DISTRIBUTED__COMM__UCX__RDMACM=False
-    export DASK_RMM__POOL_SIZE=0.5GB
-
-
-    SCHEDULER_ARGS="--protocol=ucx
-            --scheduler-file $SCHEDULER_FILE
-            "
-
-    WORKER_ARGS="--enable-tcp-over-ucx
-                --enable-nvlink
-                --disable-infiniband
-                --disable-rdmacm
-                --rmm-pool-size=$WORKER_RMM_POOL_SIZE
-                --rmm-maximum-pool-size=$WORKER_RMM_POOL_SIZE
-                --local-directory=/tmp/$LOGNAME
-                --scheduler-file=$SCHEDULER_FILE
-                --memory-limit=$DASK_HOST_MEMORY_LIMIT
-                --device-memory-limit=$DASK_DEVICE_MEMORY_LIMIT
-                --enable-jit-unspill
-                "
-}
-
-if [[ "$CLUSTER_CONFIG_TYPE" == "UCX" ]]; then
-    logger "Using cluster configurtion for UCX"
-    buildUCXwithoutInfinibandArgs
-elif [[ "$CLUSTER_CONFIG_TYPE" == "UCXIB" ]]; then
-    logger "Using cluster configurtion for UCX with Infiniband"
-    buildUCXWithInfinibandArgs
-else
-    logger "Using cluster configurtion for TCP"
-    buildTcpArgs
-fi
-
-
-########################################
-
-scheduler_pid=""
-worker_pid=""
-num_scheduler_tries=0
-
-function startScheduler {
-    mkdir -p $(dirname $SCHEDULER_FILE)
-    echo "RUNNING: \"python -m distributed.cli.dask_scheduler $SCHEDULER_ARGS\"" > $SCHEDULER_LOG
-    dask-scheduler $SCHEDULER_ARGS >> $SCHEDULER_LOG 2>&1 &
-    scheduler_pid=$!
-}
-
-mkdir -p $LOGS_DIR
-logger "Logs written to: $LOGS_DIR"
-
-if [[ $START_SCHEDULER == 1 ]]; then
-    rm -f $SCHEDULER_FILE $SCHEDULER_LOG $WORKERS_LOG
-
-    startScheduler
-    sleep 6
-    num_scheduler_tries=$(python -c "print($num_scheduler_tries+1)")
-
-    # Wait for the scheduler to start first before proceeding, since
-    # it may require several retries (if prior run left ports open
-    # that need time to close, etc.)
-    while [ ! -f "$SCHEDULER_FILE" ]; do
-        scheduler_alive=$(ps -p $scheduler_pid > /dev/null ; echo $?)
-        if [[ $scheduler_alive != 0 ]]; then
-            if [[ $num_scheduler_tries != 30 ]]; then
-                echo "scheduler failed to start, retry #$num_scheduler_tries"
-                startScheduler
-                sleep 6
-                num_scheduler_tries=$(echo $num_scheduler_tries+1 | bc)
-            else
-                echo "could not start scheduler, exiting."
-                exit 1
-            fi
-        fi
-    done
-    echo "scheduler started."
-fi
-
-if [[ $START_WORKERS == 1 ]]; then
-    rm -f $WORKERS_LOG
-    while [ ! -f "$SCHEDULER_FILE" ]; do
-        echo "run-dask-process.sh: $SCHEDULER_FILE not present - waiting to start workers..."
-        sleep 2
-    done
-    echo "RUNNING: \"python -m dask_cuda.cli.dask_cuda_worker $WORKER_ARGS\"" > $WORKERS_LOG
-    dask-cuda-worker $WORKER_ARGS >> $WORKERS_LOG 2>&1 &
-    worker_pid=$!
-    echo "worker(s) started."
-fi
-
-# This script will not return until the following background process
-# have been completed/killed.
-if [[ $worker_pid != "" ]]; then
-    echo "waiting for worker pid $worker_pid to finish before exiting script..."
-    wait $worker_pid
-fi
-if [[ $scheduler_pid != "" ]]; then
-    echo "waiting for scheduler pid $scheduler_pid to finish before exiting script..."
-    wait $scheduler_pid
-fi
diff --git a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/wait_for_workers.py b/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/wait_for_workers.py
deleted file mode 100644
index 29d5cb7fbd7..00000000000
--- a/benchmarks/cugraph/standalone/bulk_sampling/mg_utils/wait_for_workers.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import time
-import yaml
-
-from dask.distributed import Client
-
-
-def initialize_dask_cuda(communication_type):
-    communication_type = communication_type.lower()
-    if "ucx" in communication_type:
-        os.environ["UCX_MAX_RNDV_RAILS"] = "1"
-
-    if communication_type == "ucx-ib":
-        os.environ["UCX_MEMTYPE_REG_WHOLE_ALLOC_TYPES"]="cuda"
-        os.environ["DASK_RMM__POOL_SIZE"]="0.5GB"
-        os.environ["DASK_DISTRIBUTED__COMM__UCX__CREATE_CUDA_CONTEXT"]="True"
-
-
-def wait_for_workers(
-    num_expected_workers, scheduler_file_path, communication_type, timeout_after=0
-):
-    """
-    Waits until num_expected_workers workers are available based on
-    the workers managed by scheduler_file_path, then returns 0. If
-    timeout_after is specified, will return 1 if num_expected_workers
-    workers are not available before the timeout.
-    """
-    # FIXME: use scheduler file path from global environment if none
-    # supplied in configuration yaml
-
-    print("wait_for_workers.py - initializing client...", end="")
-    sys.stdout.flush()
-    initialize_dask_cuda(communication_type)
-    print("done.")
-    sys.stdout.flush()
-
-    ready = False
-    start_time = time.time()
-    while not ready:
-        if timeout_after and ((time.time() - start_time) >= timeout_after):
-            print(
-                f"wait_for_workers.py timed out after {timeout_after} seconds before finding {num_expected_workers} workers."
-            )
-            sys.stdout.flush()
-            break
-        with Client(scheduler_file=scheduler_file_path) as client:
-            num_workers = len(client.scheduler_info()["workers"])
-            if num_workers < num_expected_workers:
-                print(
-                    f"wait_for_workers.py expected {num_expected_workers} but got {num_workers}, waiting..."
-                )
-                sys.stdout.flush()
-                time.sleep(5)
-            else:
-                print(f"wait_for_workers.py got {num_workers} workers, done.")
-                sys.stdout.flush()
-                ready = True
-
-    if ready is False:
-        return 1
-    return 0
-
-
-if __name__ == "__main__":
-    import argparse
-
-    ap = argparse.ArgumentParser()
-    ap.add_argument(
-        "--num-expected-workers",
-        type=int,
-        required=False,
-        help="Number of workers to wait for. If not specified, "
-        "uses the NUM_WORKERS env var if set, otherwise defaults "
-        "to 16.",
-    )
-    ap.add_argument(
-        "--scheduler-file-path",
-        type=str,
-        required=True,
-        help="Path to shared scheduler file to read.",
-    )
-    ap.add_argument(
-        "--communication-type",
-        type=str,
-        default="tcp",
-        required=False,
-        help="Initiliaze dask_cuda based on the cluster communication type."
-        "Supported values are tcp(default), ucx, ucxib, ucx-ib.",
-    )
-    ap.add_argument(
-        "--timeout-after",
-        type=int,
-        default=0,
-        required=False,
-        help="Number of seconds to wait for workers. "
-        "Default is 0 which means wait forever.",
-    )
-    args = ap.parse_args()
-
-    if args.num_expected_workers is None:
-        args.num_expected_workers = os.environ.get("NUM_WORKERS", 16)
-
-    exitcode = wait_for_workers(
-        num_expected_workers=args.num_expected_workers,
-        scheduler_file_path=args.scheduler_file_path,
-        communication_type=args.communication_type,
-        timeout_after=args.timeout_after,
-    )
-
-    sys.exit(exitcode)