diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 3ae4d94..1ec9e1e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -18,6 +18,7 @@ jobs:
       - conda-cpp-tests
       - conda-python-build
       - conda-python-tests
+      - conda-notebook-tests
       - wheel-build-pylibwholegraph
       - wheel-tests-pylibwholegraph
       - wheel-build-cugraph-dgl
@@ -51,6 +52,7 @@ jobs:
           - '!CONTRIBUTING.md'
           - '!README.md'
           - '!docs/**'
+          - '!readme_pages/**'
         test_python:
           - '**'
           - '!.devcontainers/**'
@@ -59,6 +61,7 @@ jobs:
           - '!docs/**'
           - '!img/**'
           - '!notebooks/**'
+          - '!readme_pages/**'
   checks:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12
@@ -83,6 +86,17 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12
     with:
       build_type: pull-request
+  conda-notebook-tests:
+    needs: [conda-python-build, changed-files]
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
+    with:
+      build_type: pull-request
+      node_type: "gpu-v100-latest-1"
+      arch: "amd64"
+      container_image: "rapidsai/ci-conda:cuda11.8.0-ubuntu22.04-py3.12"
+      run_script: "ci/test_notebooks.sh"
   conda-python-tests:
     needs: [conda-python-build, changed-files]
     secrets: inherit
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index cda7a18..8fa06d8 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -22,6 +22,16 @@ jobs:
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
+  conda-notebook-tests:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
+    with:
+      build_type: pull-request
+      node_type: "gpu-v100-latest-1"
+      arch: "amd64"
+      container_image: "rapidsai/ci-conda:cuda11.8.0-ubuntu22.04-py3.12"
+      run_script: "ci/test_notebooks.sh"
   conda-python-tests:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12
diff --git a/.gitignore b/.gitignore
index 93682a3..1ccc278 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,7 +15,7 @@ __pycache__
 DartConfiguration.tcl
 .DS_Store
 rmm_log.txt
-.ruff_cache
+.ruff_cache/
 
 # Unit test / coverage reports
 htmlcov/
diff --git a/build.sh b/build.sh
index edece26..c6f4955 100755
--- a/build.sh
+++ b/build.sh
@@ -29,12 +29,14 @@ VALIDARGS="
    pylibwholegraph
    libwholegraph
    tests
+   benchmarks
    all
    -v
    -g
    -n
    --pydevelop
    --allgpuarch
+   --compile-cmd
    --clean
    -h
    --help
@@ -49,6 +51,7 @@ HELP="$0 [<target> ...] [<flag> ...]
    pylibwholegraph            - build the pylibwholegraph Python package
    libwholegraph              - build the libwholegraph library
    tests                      - build the C++ tests
+   benchmarks                 - build benchmarks
    all                        - build everything
  and <flag> is:
    -v                         - verbose build mode
@@ -56,6 +59,8 @@ HELP="$0 [<target> ...] [<flag> ...]
    -n                         - do not install after a successful build (does not affect Python packages)
    --pydevelop                - install the Python packages in editable mode
    --allgpuarch               - build for all supported GPU architectures
+   --enable-nvshmem            - build with nvshmem support (beta).
+   --compile-cmd               - only output compile commands (invoke CMake without build)
    --clean                    - clean an individual target (note: to do a complete rebuild, use the clean target described above)
    -h                         - print this text
 
@@ -140,11 +145,22 @@ if hasArg --pydevelop; then
     PYTHON_ARGS_FOR_INSTALL="${PYTHON_ARGS_FOR_INSTALL} -e"
 fi
 
+if hasArg --enable-nvshmem; then
+    BUILD_WITH_NVSHMEM=ON
+else
+    BUILD_WITH_NVSHMEM=OFF
+fi
 if hasArg tests; then
     BUILD_TESTS=ON
 else
     BUILD_TESTS=OFF
 fi
+if hasArg benchmarks; then
+    BUILD_BENCHMARKS=ON
+else
+    BUILD_BENCHMARKS=OFF
+fi
+
 
 # If clean or uninstall targets given, run them prior to any other steps
 if hasArg uninstall; then
@@ -250,7 +266,7 @@ if hasArg cugraph-pyg || buildDefault || hasArg all; then
     fi
 fi
 
-# Install the cugraph-dgl extensions for DGL
+# Build and install the cugraph-dgl Python package
 if hasArg cugraph-dgl || buildDefault ||hasArg all; then
     if hasArg --clean; then
         cleanPythonDir ${REPODIR}/python/cugraph-dgl
diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index 7202637..ed464bd 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -13,7 +13,11 @@ export CMAKE_GENERATOR=Ninja
 
 rapids-print-env
 
-version=$(rapids-generate-version)
+# TODO: revert this once we start publishing nightly packages
+#       from the 'cugraph-gnn' repo and stop publishing them from
+#       the 'cugraph' / 'wholegraph' repos
+#version=$(rapids-generate-version)
+version="24.12.00a1000"
 
 rapids-logger "Begin cpp build"
 
diff --git a/ci/build_python.sh b/ci/build_python.sh
index e3a2c02..c74c7e2 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -15,7 +15,11 @@ rapids-print-env
 
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 
-rapids-generate-version > ./VERSION
+# TODO: revert this once we start publishing nightly packages
+#       from the 'cugraph-gnn' repo and stop publishing them from
+#       the 'cugraph' / 'wholegraph' repos
+# rapids-generate-version > ./VERSION
+echo "24.12.00a1000" > ./VERSION
 
 sccache --zero-stats
 
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index 31ec560..1a18e60 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -5,6 +5,8 @@ set -Eeuo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+RAPIDS_VERSION="$(rapids-version)"
+
 rapids-logger "Generate notebook testing dependencies"
 rapids-dependency-file-generator \
   --output conda \
@@ -24,18 +26,20 @@ rapids-logger "Downloading artifacts from previous jobs"
 CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 
+# TODO: remove the '>=24.12.00a1000' once we start publishing nightly packages
+#       from the 'cugraph-gnn' repo and stop publishing them from
+#       the 'cugraph' / 'wholegraph' repos
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   --channel "${PYTHON_CHANNEL}" \
-  libcugraph pylibcugraph cugraph
+  --channel dglteam/label/th23_cu118 \
+  "cugraph-dgl=${RAPIDS_VERSION},>=24.12.00a1000"
 
 NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")"
 NOTEBOOK_LIST="$(realpath "$(dirname "$0")/notebook_list.py")"
 EXITCODE=0
 trap "EXITCODE=1" ERR
 
-
-pushd notebooks
 TOPLEVEL_NB_FOLDERS="$(find . -name "*.ipynb" | cut -d'/' -f2 | sort -u)"
 set +e
 # Always run nbtest in all TOPLEVEL_NB_FOLDERS, set EXITCODE to failure
diff --git a/ci/test_python.sh b/ci/test_python.sh
index dd5f539..79a90d2 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -27,10 +27,120 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
 
+# RAPIDS_DATASET_ROOT_DIR is used by test scripts
+export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
+mkdir -p "${RAPIDS_DATASET_ROOT_DIR}"
+pushd "${RAPIDS_DATASET_ROOT_DIR}"
+./get_test_data.sh --benchmark
+popd
+
 EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
 
+# Test runs that include tests that use dask require
+# --import-mode=append. Those tests start a LocalCUDACluster that inherits
+# changes from pytest's modifications to PYTHONPATH (which defaults to
+# prepending source tree paths to PYTHONPATH).  This causes the
+# LocalCUDACluster subprocess to import cugraph from the source tree instead of
+# the install location, and in most cases, the source tree does not have
+# extensions built in-place and will result in ImportErrors.
+#
+# FIXME: TEMPORARILY disable MG PropertyGraph tests (experimental) tests and
+# bulk sampler IO tests (hangs in CI)
+
+if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
+  rapids-mamba-retry env create --yes -f env.yaml -n test_cugraph_dgl
+
+  # activate test_cugraph_dgl environment for dgl
+  set +u
+  conda activate test_cugraph_dgl
+  set -u
+
+  if [[ "${RAPIDS_CUDA_VERSION%%.*}" == "11" ]]; then
+    DGL_CHANNEL="dglteam/label/th23_cu118"
+  else
+    DGL_CHANNEL="dglteam/label/th23_cu121"
+  fi
+
+
+  # TODO: remove the '>=24.12.00a1000' once we start publishing nightly packages
+  #       from the 'cugraph-gnn' repo and stop publishing them from
+  #       the 'cugraph' / 'wholegraph' repos
+  rapids-mamba-retry install \
+    --channel "${CPP_CHANNEL}" \
+    --channel "${PYTHON_CHANNEL}" \
+    --channel pytorch \
+    --channel conda-forge \
+    --channel "${DGL_CHANNEL}" \
+    --channel nvidia \
+    "pylibwholegraph=${RAPIDS_VERSION},>=24.12.00a1000" \
+    "cugraph-dgl=${RAPIDS_VERSION},>=24.12.00a1000" \
+    'pytorch::pytorch>=2.3,<2.4' \
+    "ogb"
+
+  rapids-print-env
+
+  rapids-logger "Check GPU usage"
+  nvidia-smi
+
+  rapids-logger "pytest cugraph_dgl (single GPU)"
+  ./ci/run_cugraph_dgl_pytests.sh \
+    --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-dgl.xml" \
+    --cov-config=../../.coveragerc \
+    --cov=cugraph_dgl \
+    --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-dgl-coverage.xml" \
+    --cov-report=term
+
+  # Reactivate the test environment back
+  set +u
+  conda deactivate
+  set -u
+else
+  rapids-logger "skipping cugraph_dgl pytest on ARM64"
+fi
+
+if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
+  rapids-mamba-retry env create --yes -f env.yaml -n test_cugraph_pyg
+
+  # Temporarily allow unbound variables for conda activation.
+  set +u
+  conda activate test_cugraph_pyg
+  set -u
+
+  # TODO: remove the '>=24.12.00a1000' once we start publishing nightly packages
+  #       from the 'cugraph-gnn' repo and stop publishing them from
+  #       the 'cugraph' / 'wholegraph' repos
+  rapids-mamba-retry install \
+    --channel "${CPP_CHANNEL}" \
+    --channel "${PYTHON_CHANNEL}" \
+    --channel pytorch \
+    "pylibwholegraph=${RAPIDS_VERSION},>=24.12.00a1000" \
+    "cugraph-pyg=${RAPIDS_VERSION},>=24.12.00a1000" \
+    'pytorch::pytorch>=2.3,<2.4' \
+    'ogb'
+
+  rapids-print-env
+
+  rapids-logger "Check GPU usage"
+  nvidia-smi
+
+  rapids-logger "pytest cugraph_pyg (single GPU)"
+  ./ci/run_cugraph_pyg_pytests.sh \
+    --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-pyg.xml" \
+    --cov-config=../../.coveragerc \
+    --cov=cugraph_pyg \
+    --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-pyg-coverage.xml" \
+    --cov-report=term
+
+  # Reactivate the test environment back
+  set +u
+  conda deactivate
+  set -u
+else
+  rapids-logger "skipping cugraph_pyg pytest on ARM64"
+fi
+
 if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
   rapids-mamba-retry env create --yes -f env.yaml -n test_pylibwholegraph
 
@@ -39,13 +149,15 @@ if [[ "${RUNNER_ARCH}" != "ARM64" ]]; then
   conda activate test_pylibwholegraph
   set -u
 
-  # Will automatically install built dependencies of pylibwholegraph
+  # TODO: remove the '>=24.12.00a1000' once we start publishing nightly packages
+  #       from the 'cugraph-gnn' repo and stop publishing them from
+  #       the 'cugraph' / 'wholegraph' repos
   rapids-mamba-retry install \
     --channel "${CPP_CHANNEL}" \
     --channel "${PYTHON_CHANNEL}" \
     --channel pytorch \
     'mkl<2024.1.0' \
-    "pylibwholegraph=${RAPIDS_VERSION}" \
+    "pylibwholegraph=${RAPIDS_VERSION},>=24.12.00a1000" \
     'pytorch::pytorch>=2.3,<2.4' \
     'pytest-forked' \
     'ogb'
diff --git a/ci/utils/nbtest.sh b/ci/utils/nbtest.sh
index 91af633..faf7d28 100755
--- a/ci/utils/nbtest.sh
+++ b/ci/utils/nbtest.sh
@@ -60,7 +60,7 @@ for nb in $*; do
     echo --------------------------------------------------------------------------------
     echo STARTING: ${NBNAME}
     echo --------------------------------------------------------------------------------
-    jupyter nbconvert --to script ${NBFILENAME} --output ${NBTMPDIR}/${NBNAME}-test
+    jupyter nbconvert --to python ${NBFILENAME} --output ${NBTMPDIR}/${NBNAME}-test
     echo "${MAGIC_OVERRIDE_CODE}" > ${NBTMPDIR}/tmpfile
     cat ${NBTESTSCRIPT} >> ${NBTMPDIR}/tmpfile
     mv ${NBTMPDIR}/tmpfile ${NBTESTSCRIPT}
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index aaef934..48adcf5 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -21,6 +21,7 @@ dependencies:
 - doxygen
 - graphviz
 - ipython
+- nbconvert
 - nbsphinx
 - nccl>=2.19
 - ninja
@@ -28,6 +29,7 @@ dependencies:
 - numba>=0.57
 - numpy>=1.23,<3.0a0
 - numpydoc
+- ogb
 - pandas
 - pre-commit
 - pydantic
@@ -54,6 +56,5 @@ dependencies:
 - sphinx<6
 - sphinxcontrib-websupport
 - torchdata
-- wget
 - wheel
 name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml
index e6422ee..77bd861 100644
--- a/conda/environments/all_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-121_arch-x86_64.yaml
@@ -26,6 +26,7 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- nbconvert
 - nbsphinx
 - nccl>=2.19
 - ninja
@@ -33,6 +34,7 @@ dependencies:
 - numba>=0.57
 - numpy>=1.23,<3.0a0
 - numpydoc
+- ogb
 - pandas
 - pre-commit
 - pydantic
@@ -59,6 +61,5 @@ dependencies:
 - sphinx<6
 - sphinxcontrib-websupport
 - torchdata
-- wget
 - wheel
 name: all_cuda-121_arch-x86_64
diff --git a/conda/environments/all_cuda-124_arch-x86_64.yaml b/conda/environments/all_cuda-124_arch-x86_64.yaml
index 47a7a9b..30a259e 100644
--- a/conda/environments/all_cuda-124_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-124_arch-x86_64.yaml
@@ -26,6 +26,7 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- nbconvert
 - nbsphinx
 - nccl>=2.19
 - ninja
@@ -33,6 +34,7 @@ dependencies:
 - numba>=0.57
 - numpy>=1.23,<3.0a0
 - numpydoc
+- ogb
 - pandas
 - pre-commit
 - pydantic
@@ -59,6 +61,5 @@ dependencies:
 - sphinx<6
 - sphinxcontrib-websupport
 - torchdata
-- wget
 - wheel
 name: all_cuda-124_arch-x86_64
diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml
index 34e826e..16bbe24 100644
--- a/conda/recipes/cugraph-dgl/meta.yaml
+++ b/conda/recipes/cugraph-dgl/meta.yaml
@@ -24,7 +24,7 @@ requirements:
     - rapids-build-backend>=0.3.1,<0.4.0.dev0
     - setuptools>=61.0.0
   run:
-    - cugraph ={{ version }}
+    - cugraph ={{ minor_version }}
     - dgl >=2.4.0.th23.cu*
     - numba >=0.57
     - numpy >=1.23,<3.0a0
diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml
index 10122f6..9543209 100644
--- a/conda/recipes/cugraph-pyg/meta.yaml
+++ b/conda/recipes/cugraph-pyg/meta.yaml
@@ -34,7 +34,7 @@ requirements:
     - python
     - pytorch >=2.3,<2.4.0a0
     - cupy >=12.0.0
-    - cugraph ={{ version }}
+    - cugraph ={{ minor_version }}
     - pylibcugraphops ={{ minor_version }}
     - tensordict >=0.1.2
     - pytorch_geometric >=2.5,<2.6
diff --git a/datasets/get_test_data.sh b/datasets/get_test_data.sh
new file mode 100755
index 0000000..6778166
--- /dev/null
+++ b/datasets/get_test_data.sh
@@ -0,0 +1,128 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -e
+set -o pipefail
+
+# Ensure we're in the cugraph/datasets dir
+cd "$( cd "$( dirname "$(realpath -m "${BASH_SOURCE[0]}")" )" && pwd )";
+
+# Update this to add/remove/change a dataset, using the following format:
+#
+#  comment about the dataset
+#  dataset download URL
+#  destination dir to untar to
+#  blank line separator
+#
+# FIXME: some test data needs to be extracted to "benchmarks", which is
+# confusing now that there's dedicated datasets for benchmarks.
+CPP_CI_DATASET_DATA="
+# ~10s download
+https://data.rapids.ai/cugraph/test/cpp_ci_datasets.tgz
+test
+"
+
+BASE_DATASET_DATA="
+# ~22s download
+https://data.rapids.ai/cugraph/test/datasets.tgz
+test
+
+# ~14s download
+https://data.rapids.ai/cugraph/test/ref/pagerank.tgz
+test/ref
+
+# ~1s download
+https://data.rapids.ai/cugraph/test/ref/sssp.tgz
+test/ref
+
+# ~15s download
+https://data.rapids.ai/cugraph/benchmark/hibench/hibench_1_large.tgz
+benchmark
+
+# ~1s download
+https://data.rapids.ai/cugraph/benchmark/hibench/hibench_1_small.tgz
+benchmark
+
+# ~0.6s download
+https://data.rapids.ai/cugraph/test/tsplib/datasets.tar.gz
+tsplib
+"
+
+EXTENDED_DATASET_DATA="
+# ~42s download - tests using this dataset are currently not run in test.sh with --quick
+https://data.rapids.ai/cugraph/benchmark/hibench/hibench_1_huge.tgz
+benchmark
+"
+
+BENCHMARK_DATASET_DATA="
+# ~90s download - these are used for benchmarks runs (code in <cugraph root>/benchmarks)
+https://data.rapids.ai/cugraph/benchmark/benchmark_csv_data.tgz
+csv
+"
+
+SELF_LOOPS_DATASET_DATA="
+# ~1s download
+https://data.rapids.ai/cugraph/benchmark/benchmark_csv_data_self_loops.tgz
+self_loops
+"
+################################################################################
+# Do not change the script below this line if only adding/updating a dataset
+
+NUMARGS=$#
+ARGS=$*
+function hasArg {
+    (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
+}
+
+if hasArg -h || hasArg --help; then
+    echo "$0 [--subset | --benchmark | --self_loops]"
+    exit 0
+fi
+
+# Select the datasets to install
+if hasArg "--benchmark"; then
+    DATASET_DATA="${BENCHMARK_DATASET_DATA}"
+elif hasArg "--subset"; then
+    DATASET_DATA="${BASE_DATASET_DATA}"
+elif hasArg "--cpp_ci_subset"; then
+    DATASET_DATA="${CPP_CI_DATASET_DATA}"
+elif hasArg "--self_loops"; then
+    DATASET_DATA="${SELF_LOOPS_DATASET_DATA}"
+# Do not include benchmark datasets by default - too big
+else
+    DATASET_DATA="${BASE_DATASET_DATA} ${EXTENDED_DATASET_DATA}"
+fi
+
+URLS=($(echo "$DATASET_DATA"|awk '{if (NR%4 == 3) print $0}'))  # extract 3rd fields to a bash array
+DESTDIRS=($(echo "$DATASET_DATA"|awk '{if (NR%4 == 0) print $0}'))  # extract 4th fields to a bash array
+
+echo Downloading ...
+
+# Download all tarfiles to a tmp dir
+mkdir -p tmp
+cd tmp
+for url in ${URLS[*]}; do
+   time wget -N --progress=dot:giga ${url}
+done
+cd ..
+
+# create the destination dirs
+mkdir -p "${DESTDIRS[@]}"
+
+# Iterate over the arrays and untar the nth tarfile to the nth dest directory.
+# The tarfile name is derived from the download url.
+echo Decompressing ...
+for index in ${!DESTDIRS[*]}; do
+    echo "tmp/$(basename "${URLS[$index]}") -C ${DESTDIRS[$index]}" | tr '\n' '\0'
+done | xargs -0 -t -r -n1 -P$(nproc --all) sh -c 'tar -xzvf $0 --overwrite'
diff --git a/dependencies.yaml b/dependencies.yaml
index 37b9262..1787b05 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -55,15 +55,16 @@ files:
     output: none
     includes:
       - cuda_version
+      - depends_on_pytorch
       - py_version
       - test_notebook
-      - test_python_common
   test_python:
     output: none
     includes:
       - cuda_version
       - depends_on_cugraph
       - depends_on_cudf
+      - depends_on_dgl
       - depends_on_pytorch
       - py_version
       - test_python_common
@@ -348,10 +349,9 @@ dependencies:
       - output_types: [conda, requirements]
         packages:
           - ipython
+          - nbconvert
           - notebook>=0.5.0
-      - output_types: [conda]
-        packages:
-          - wget
+          - ogb
   test_python_common:
     common:
       - output_types: [conda, pyproject]
diff --git a/python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb b/python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb
index 15708f5..66ee205 100644
--- a/python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb
+++ b/python/cugraph-dgl/examples/dataset_from_disk_cudf.ipynb
@@ -18,12 +18,11 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"4\"\n",
-    "import cudf\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"\n",
     "import rmm\n",
     "import torch\n",
     "from rmm.allocators.torch import rmm_torch_allocator\n",
-    "rmm.reinitialize(initial_pool_size=15e9)\n",
+    "rmm.reinitialize(pool_allocator=True, initial_pool_size=\"14GiB\")\n",
     "#Switch to async pool in case of memory issues due to fragmentation of the pool\n",
     "#rmm.mr.set_current_device_resource(rmm.mr.CudaAsyncMemoryResource(initial_pool_size=15e9))\n",
     "torch.cuda.memory.change_current_allocator(rmm_torch_allocator)"
@@ -52,8 +51,10 @@
    "source": [
     "def load_dgl_dataset(dataset_name='ogbn-products'):\n",
     "    from ogb.nodeproppred import DglNodePropPredDataset\n",
-    "    dataset_root = '/raid/vjawa/gnn/'\n",
-    "    dataset =  DglNodePropPredDataset(name = dataset_name, root=dataset_root)\n",
+    "    from unittest.mock import patch\n",
+    "    dataset_root = '/tmp/'\n",
+    "    with patch(\"builtins.input\", return_value=\"y\"):\n",
+    "        dataset =  DglNodePropPredDataset(name = dataset_name, root=dataset_root)\n",
     "    split_idx = dataset.get_idx_split()\n",
     "    train_idx, valid_idx, test_idx = split_idx[\"train\"], split_idx[\"valid\"], split_idx[\"test\"]\n",
     "    g, label = dataset[0]\n",
@@ -80,8 +81,7 @@
    },
    "outputs": [],
    "source": [
-    "import cugraph_dgl\n",
-    "import tempfile"
+    "import cugraph_dgl"
    ]
   },
   {
@@ -113,7 +113,7 @@
     "    g,                               \n",
     "    train_idx.to('cuda'),                        # train_nid must be on GPU.\n",
     "    sampler,\n",
-    "    sampling_output_dir=\"/raid/vjawa/obgn_products_sampling/\", # Path to save sampling results to, Change to the fastest IO path available\n",
+    "    sampling_output_dir=\"/tmp/\", # Path to save sampling results to, Change to the fastest IO path available\n",
     "    device=torch.device('cuda'),    # The device argument must be GPU.\n",
     "    num_workers=0,                 # Number of workers must be 0.\n",
     "    batch_size=batch_size,\n",
@@ -256,7 +256,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.12.7"
   },
   "vscode": {
    "interpreter": {