diff --git a/datasets/README.md b/datasets/README.md
index e42413fc996..a23dc644081 100644
--- a/datasets/README.md
+++ b/datasets/README.md
@@ -120,9 +120,13 @@ The benchmark datasets are described below:
 | soc-twitter-2010  | 21,297,772 |   265,025,809 | No       | No       |
 
 **cit-Patents** : A citation graph that includes all citations made by patents granted between 1975 and 1999, totaling 16,522,438 citations.
+
 **soc-LiveJournal** : A graph of the LiveJournal social network.
+
 **europe_osm** : A graph of OpenStreetMap data for Europe.
+
 **hollywood** : A graph of movie actors where vertices are actors, and two actors are joined by an edge whenever they appeared in a movie together.
+
 **soc-twitter-2010** : A network of follower relationships from a snapshot of Twitter in 2010, where an edge from i to j indicates that j is a follower of i.
 
 _NOTE: the benchmark datasets were converted to a CSV format from their original format described in the reference URL below, and in doing so had edge weights and isolated vertices discarded._
diff --git a/python/cugraph/cugraph/datasets/__init__.py b/python/cugraph/cugraph/datasets/__init__.py
index 65a820f108b..ac18274d354 100644
--- a/python/cugraph/cugraph/datasets/__init__.py
+++ b/python/cugraph/cugraph/datasets/__init__.py
@@ -39,3 +39,13 @@
 small_tree = Dataset(meta_path / "small_tree.yaml")
 toy_graph = Dataset(meta_path / "toy_graph.yaml")
 toy_graph_undirected = Dataset(meta_path / "toy_graph_undirected.yaml")
+
+# Benchmarking datasets: be mindful of memory usage
+# 250 MB
+soc_livejournal = Dataset(meta_path / "soc-livejournal1.yaml")
+# 965 MB
+cit_patents = Dataset(meta_path / "cit-patents.yaml")
+# 1.8 GB
+europe_osm = Dataset(meta_path / "europe_osm.yaml")
+# 1.5 GB
+hollywood = Dataset(meta_path / "hollywood.yaml")
diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py
index 877eade7708..b36042606e7 100644
--- a/python/cugraph/cugraph/datasets/dataset.py
+++ b/python/cugraph/cugraph/datasets/dataset.py
@@ -26,10 +26,22 @@ class DefaultDownloadDir:
     a single object.
     """
 
-    def __init__(self):
-        self._path = Path(
-            os.environ.get("RAPIDS_DATASET_ROOT_DIR", Path.home() / ".cugraph/datasets")
-        )
+    def __init__(self, path_modifier=None):
+        if path_modifier:
+            self._path = (
+                Path(
+                    os.environ.get(
+                        "RAPIDS_DATASET_ROOT_DIR", Path.home() / ".cugraph/datasets"
+                    )
+                )
+                / path_modifier
+            )
+        else:
+            self._path = Path(
+                os.environ.get(
+                    "RAPIDS_DATASET_ROOT_DIR", Path.home() / ".cugraph/datasets"
+                )
+            )
 
     @property
     def path(self):
@@ -53,6 +65,23 @@ def path(self, new):
     def clear(self):
         self._path = None
 
+    def set_download_dir(self, path):
+        """
+        Set the download location for datasets
+
+        Parameters
+        ----------
+        path : String
+            Location used to store datafiles
+        """
+        if path is None:
+            self.clear()
+        else:
+            self._path = path
+
+    def get_download_dir(self):
+        return self._path.absolute()
+
 
 default_download_dir = DefaultDownloadDir()
 
@@ -159,7 +188,7 @@ def unload(self):
         """
         self._edgelist = None
 
-    def get_edgelist(self, download=False):
+    def get_edgelist(self, download=False, create_using=cudf):
         """
         Return an Edgelist
 
@@ -168,6 +197,10 @@ def get_edgelist(self, download=False):
         download : Boolean (default=False)
             Automatically download the dataset from the 'url' location within
             the YAML file.
+
+        create_using : module (default=cudf)
+            Specify which module to use when reading the dataset. This module
+            must have a read_csv function.
         """
         if self._edgelist is None:
             full_path = self.get_path()
@@ -183,7 +216,15 @@ def get_edgelist(self, download=False):
             header = None
             if isinstance(self.metadata["header"], int):
                 header = self.metadata["header"]
-            self._edgelist = cudf.read_csv(
+            if create_using is None:
+                reader = cudf
+            elif str(type(create_using)) != "<class 'module'>":
+                raise RuntimeError("create_using must be a module.")
+            elif create_using.__name__ == "cudf" or "pandas":
+                reader = create_using
+            else:
+                raise NotImplementedError()
+            self._edgelist = reader.read_csv(
                 full_path,
                 delimiter=self.metadata["delim"],
                 names=self.metadata["col_names"],
@@ -219,6 +260,10 @@ def get_graph(
             dataset -if present- will be applied to the Graph. If the
             dataset does not contain weights, the Graph returned will
             be unweighted regardless of ignore_weights.
+
+        store_transposed: Boolean (default=False)
+            If True, stores the transpose of the adjacency matrix.  Required
+            for certain algorithms, such as pagerank.
         """
         if self._edgelist is None:
             self.get_edgelist(download)
@@ -237,20 +282,19 @@ def get_graph(
                 "(or subclass) type or instance, got: "
                 f"{type(create_using)}"
             )
-
         if len(self.metadata["col_names"]) > 2 and not (ignore_weights):
             G.from_cudf_edgelist(
                 self._edgelist,
-                source="src",
-                destination="dst",
-                edge_attr="wgt",
+                source=self.metadata["col_names"][0],
+                destination=self.metadata["col_names"][1],
+                edge_attr=self.metadata["col_names"][2],
                 store_transposed=store_transposed,
             )
         else:
             G.from_cudf_edgelist(
                 self._edgelist,
-                source="src",
-                destination="dst",
+                source=self.metadata["col_names"][0],
+                destination=self.metadata["col_names"][1],
                 store_transposed=store_transposed,
             )
         return G
@@ -331,7 +375,7 @@ def download_all(force=False):
 
 def set_download_dir(path):
     """
-    Set the download location fors datasets
+    Set the download location for datasets
 
     Parameters
     ----------
diff --git a/python/cugraph/cugraph/datasets/metadata/cit-patents.yaml b/python/cugraph/cugraph/datasets/metadata/cit-patents.yaml
new file mode 100644
index 00000000000..d5c4cf195bd
--- /dev/null
+++ b/python/cugraph/cugraph/datasets/metadata/cit-patents.yaml
@@ -0,0 +1,22 @@
+name: cit-Patents
+file_type: .csv
+description: A citation graph that includes all citations made by patents granted between 1975 and 1999, totaling 16,522,438 citations.
+author: NBER
+refs:
+  J. Leskovec, J. Kleinberg and C. Faloutsos. Graphs over Time Densification Laws, Shrinking Diameters and Possible Explanations. 
+  ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD), 2005.
+delim: " "
+header: None
+col_names:
+  - src
+  - dst
+col_types:
+  - int32
+  - int32
+has_loop: true
+is_directed: true
+is_multigraph: false
+is_symmetric: false
+number_of_edges: 16518948
+number_of_nodes: 3774768
+url: https://data.rapids.ai/cugraph/datasets/cit-Patents.csv
\ No newline at end of file
diff --git a/python/cugraph/cugraph/datasets/metadata/europe_osm.yaml b/python/cugraph/cugraph/datasets/metadata/europe_osm.yaml
new file mode 100644
index 00000000000..fe0e42a4b86
--- /dev/null
+++ b/python/cugraph/cugraph/datasets/metadata/europe_osm.yaml
@@ -0,0 +1,21 @@
+name: europe_osm
+file_type: .csv
+description: A graph of OpenStreetMap data for Europe.
+author: M. Kobitzsh / Geofabrik GmbH
+refs:
+  Rossi, Ryan. Ahmed, Nesreen. The Network Data Respoistory with Interactive Graph Analytics and Visualization.
+delim: " "
+header: None
+col_names:
+  - src
+  - dst
+col_types:
+  - int32
+  - int32
+has_loop: false
+is_directed: false
+is_multigraph: false
+is_symmetric: true
+number_of_edges: 54054660
+number_of_nodes: 50912018
+url: https://data.rapids.ai/cugraph/datasets/europe_osm.csv
\ No newline at end of file
diff --git a/python/cugraph/cugraph/datasets/metadata/hollywood.yaml b/python/cugraph/cugraph/datasets/metadata/hollywood.yaml
new file mode 100644
index 00000000000..2f09cf7679b
--- /dev/null
+++ b/python/cugraph/cugraph/datasets/metadata/hollywood.yaml
@@ -0,0 +1,26 @@
+name: hollywood
+file_type: .csv
+description:
+  A graph of movie actors where vertices are actors, and two actors are
+  joined by an edge whenever they appeared in a movie together.
+author: Laboratory for Web Algorithmics (LAW)
+refs:
+  The WebGraph Framework I Compression Techniques, Paolo Boldi
+  and Sebastiano Vigna, Proc. of the Thirteenth International
+  World Wide Web Conference (WWW 2004), 2004, Manhattan, USA,
+  pp. 595--601, ACM Press.
+delim: " "
+header: None
+col_names:
+  - src
+  - dst
+col_types:
+  - int32
+  - int32
+has_loop: false
+is_directed: false
+is_multigraph: false
+is_symmetric: true
+number_of_edges: 57515616
+number_of_nodes: 1139905
+url: https://data.rapids.ai/cugraph/datasets/hollywood.csv
\ No newline at end of file
diff --git a/python/cugraph/cugraph/datasets/metadata/soc-livejournal1.yaml b/python/cugraph/cugraph/datasets/metadata/soc-livejournal1.yaml
new file mode 100644
index 00000000000..fafc68acb9b
--- /dev/null
+++ b/python/cugraph/cugraph/datasets/metadata/soc-livejournal1.yaml
@@ -0,0 +1,22 @@
+name: soc-LiveJournal1
+file_type: .csv
+description:  A graph of the LiveJournal social network.
+author: L. Backstrom, D. Huttenlocher, J. Kleinberg, X. Lan
+refs:
+  L. Backstrom, D. Huttenlocher, J. Kleinberg, X. Lan. Group Formation in        
+  Large Social Networks Membership, Growth, and Evolution. KDD, 2006.
+delim: " "
+header: None
+col_names:
+  - src
+  - dst
+col_types:
+  - int32
+  - int32
+has_loop: true
+is_directed: true
+is_multigraph: false
+is_symmetric: false
+number_of_edges: 68993773
+number_of_nodes: 4847571
+url: https://data.rapids.ai/cugraph/datasets/soc-LiveJournal1.csv
\ No newline at end of file
diff --git a/python/cugraph/cugraph/datasets/metadata/soc-twitter-2010.yaml b/python/cugraph/cugraph/datasets/metadata/soc-twitter-2010.yaml
new file mode 100644
index 00000000000..df5df5735af
--- /dev/null
+++ b/python/cugraph/cugraph/datasets/metadata/soc-twitter-2010.yaml
@@ -0,0 +1,22 @@
+name: soc-twitter-2010
+file_type: .csv
+description: A network of follower relationships from a snapshot of Twitter in 2010, where an edge from i to j indicates that j is a follower of i.
+author: H. Kwak, C. Lee, H. Park, S. Moon
+refs:
+  J. Yang, J. Leskovec. Temporal Variation in Online Media. ACM Intl.        
+  Conf. on Web Search and Data Mining (WSDM '11), 2011. 
+delim: " "
+header: None
+col_names:
+  - src
+  - dst
+col_types:
+  - int32
+  - int32
+has_loop: false
+is_directed: false
+is_multigraph: false
+is_symmetric: false
+number_of_edges: 530051354
+number_of_nodes: 21297772
+url: https://data.rapids.ai/cugraph/datasets/soc-twitter-2010.csv
\ No newline at end of file
diff --git a/python/cugraph/cugraph/testing/__init__.py b/python/cugraph/cugraph/testing/__init__.py
index f5f0bcb06eb..2b4a4fd3ebf 100644
--- a/python/cugraph/cugraph/testing/__init__.py
+++ b/python/cugraph/cugraph/testing/__init__.py
@@ -19,7 +19,7 @@
     Resultset,
     load_resultset,
     get_resultset,
-    results_dir_path,
+    default_resultset_download_dir,
 )
 from cugraph.datasets import (
     cyber,
@@ -34,6 +34,11 @@
     email_Eu_core,
     toy_graph,
     toy_graph_undirected,
+    soc_livejournal,
+    cit_patents,
+    europe_osm,
+    hollywood,
+    # twitter,
 )
 
 #
@@ -66,3 +71,4 @@
     toy_graph_undirected,
 ]
 DEFAULT_DATASETS = [dolphins, netscience, karate_disjoint]
+BENCHMARKING_DATASETS = [soc_livejournal, cit_patents, europe_osm, hollywood]
diff --git a/python/cugraph/cugraph/testing/generate_resultsets.py b/python/cugraph/cugraph/testing/generate_resultsets.py
index 9724aca32dc..ec93e445a85 100644
--- a/python/cugraph/cugraph/testing/generate_resultsets.py
+++ b/python/cugraph/cugraph/testing/generate_resultsets.py
@@ -20,8 +20,14 @@
 import cudf
 import cugraph
 from cugraph.datasets import dolphins, netscience, karate_disjoint, karate
-from cugraph.testing import utils, Resultset, SMALL_DATASETS, results_dir_path
 
+# from cugraph.testing import utils, Resultset, SMALL_DATASETS, results_dir_path
+from cugraph.testing import (
+    utils,
+    Resultset,
+    SMALL_DATASETS,
+    default_resultset_download_dir,
+)
 
 _resultsets = {}
 
@@ -224,6 +230,7 @@ def add_resultset(result_data_dictionary, **kwargs):
         ]
     )
     # Generating ALL results files
+    results_dir_path = default_resultset_download_dir.get_download_dir()
     if not results_dir_path.exists():
         results_dir_path.mkdir(parents=True, exist_ok=True)
 
diff --git a/python/cugraph/cugraph/testing/resultset.py b/python/cugraph/cugraph/testing/resultset.py
index 490e3a7c4ff..a6b3a6d2ca1 100644
--- a/python/cugraph/cugraph/testing/resultset.py
+++ b/python/cugraph/cugraph/testing/resultset.py
@@ -16,10 +16,12 @@
 import urllib.request
 
 import cudf
-from cugraph.testing import utils
+from cugraph.datasets.dataset import (
+    DefaultDownloadDir,
+    default_download_dir,
+)
 
-
-results_dir_path = utils.RAPIDS_DATASET_ROOT_DIR_PATH / "tests" / "resultsets"
+# results_dir_path = utils.RAPIDS_DATASET_ROOT_DIR_PATH / "tests" / "resultsets"
 
 
 class Resultset:
@@ -48,14 +50,12 @@ def get_cudf_dataframe(self):
 _resultsets = {}
 
 
-def load_resultset(resultset_name, resultset_download_url):
-    """
-    Read a mapping file (<resultset_name>.csv) in the _results_dir and save the
-    mappings between each unique set of args/identifiers to UUIDs to the
-    _resultsets dictionary. If <resultset_name>.csv does not exist in
-    _results_dir, use resultset_download_url to download a file to
-    install/unpack/etc. to _results_dir first.
-    """
+"""def load_resultset(resultset_name, resultset_download_url):
+    #Read a mapping file (<resultset_name>.csv) in the _results_dir and save the
+    #mappings between each unique set of args/identifiers to UUIDs to the
+    #_resultsets dictionary. If <resultset_name>.csv does not exist in
+    #_results_dir, use resultset_download_url to download a file to
+    #install/unpack/etc. to _results_dir first.
     mapping_file_path = results_dir_path / (resultset_name + "_mappings.csv")
     if not mapping_file_path.exists():
         # Downloads a tar gz from s3 bucket, then unpacks the results files
@@ -101,7 +101,7 @@ def load_resultset(resultset_name, resultset_download_url):
                 ]
             )
 
-            _resultsets[resultset_key] = uuid
+            _resultsets[resultset_key] = uuid"""
 
 
 def get_resultset(resultset_name, **kwargs):
@@ -132,5 +132,83 @@ def get_resultset(resultset_name, **kwargs):
     if uuid is None:
         raise KeyError(f"results for {arg_dict} not found")
 
+    results_dir_path = default_resultset_download_dir.get_download_dir()
     results_filename = results_dir_path / (uuid + ".csv")
     return cudf.read_csv(results_filename)
+
+
+# This seems easily refactorable, this replaces
+default_resultset_download_dir = DefaultDownloadDir("tests/resultsets")
+
+
+# Left in case we don't want to move set_download_dir and get_download_dir into
+# DefaultDownloadDir.
+"""def set_resultset_download_dir(path):
+    if path is None:
+        default_resultset_download_dir.clear()
+    else:
+        default_resultset_download_dir.path = path
+
+
+def get_resultset_download_dir():
+    return default_resultset_download_dir.path.absolute()"""
+
+
+def load_resultset(resultset_name, resultset_download_url):
+    """
+    Read a mapping file (<resultset_name>.csv) in the _results_dir and save the
+    mappings between each unique set of args/identifiers to UUIDs to the
+    _resultsets dictionary. If <resultset_name>.csv does not exist in
+    _results_dir, use resultset_download_url to download a file to
+    install/unpack/etc. to _results_dir first.
+    """
+    # curr_resultset_download_dir = get_resultset_download_dir()
+    curr_resultset_download_dir = default_resultset_download_dir.get_download_dir()
+    # curr_download_dir = get_download_dir()
+    curr_download_dir = default_download_dir.get_download_dir()
+    mapping_file_path = curr_resultset_download_dir / (resultset_name + "_mappings.csv")
+    if not mapping_file_path.exists():
+        # Downloads a tar gz from s3 bucket, then unpacks the results files
+        compressed_file_dir = curr_download_dir / "tests"
+        compressed_file_path = compressed_file_dir / "resultsets.tar.gz"
+        if not curr_resultset_download_dir.exists():
+            curr_resultset_download_dir.mkdir(parents=True, exist_ok=True)
+        if not compressed_file_path.exists():
+            urllib.request.urlretrieve(resultset_download_url, compressed_file_path)
+        tar = tarfile.open(str(compressed_file_path), "r:gz")
+        tar.extractall(str(curr_resultset_download_dir))
+        tar.close()
+
+    # FIXME: This assumes separator is " ", but should this be configurable?
+    sep = " "
+    with open(mapping_file_path) as mapping_file:
+        for line in mapping_file.readlines():
+            if line.startswith("#"):
+                continue
+
+            (uuid, *row_args) = line.split(sep)
+            if (len(row_args) % 2) != 0:
+                raise ValueError(
+                    f'bad row in {mapping_file_path}: "{line}", must '
+                    "contain UUID followed by an even number of items"
+                )
+            row_keys = row_args[::2]
+            row_vals = row_args[1::2]
+            row_keys = " ".join(row_keys).split()
+            row_vals = " ".join(row_vals).split()
+            arg_dict = dict(zip(row_keys, row_vals))
+            arg_dict["resultset_name"] = resultset_name
+            # Create a unique string key for the _resultsets dict based on
+            # sorted row_keys. Looking up results based on args will also have
+            # to sort, but this will ensure results can looked up without
+            # requiring maintaining a specific order. Example:
+            # {'a': 1, 'z': 9, 'c': 5, 'b': 2} becomes 'a-1-b-2-c-5-z-9'
+            resultset_key = "-".join(
+                [
+                    str(val)
+                    for arg_dict_pair in sorted(arg_dict.items())
+                    for val in arg_dict_pair
+                ]
+            )
+
+            _resultsets[resultset_key] = uuid
diff --git a/python/cugraph/cugraph/tests/utils/test_dataset.py b/python/cugraph/cugraph/tests/utils/test_dataset.py
index c2a4f7c6072..26c295c9352 100644
--- a/python/cugraph/cugraph/tests/utils/test_dataset.py
+++ b/python/cugraph/cugraph/tests/utils/test_dataset.py
@@ -27,6 +27,7 @@
     ALL_DATASETS,
     WEIGHTED_DATASETS,
     SMALL_DATASETS,
+    BENCHMARKING_DATASETS,
 )
 from cugraph import datasets
 
@@ -94,7 +95,10 @@ def setup_deprecation_warning_tests():
 # Helpers
 
 # check if there is a row where src == dst
-def has_loop(df):
+def has_selfloop(dataset):
+    if not dataset.metadata["is_directed"]:
+        return False
+    df = dataset.get_edgelist(download=True)
     df.rename(columns={df.columns[0]: "src", df.columns[1]: "dst"}, inplace=True)
     res = df.where(df["src"] == df["dst"])
 
@@ -109,7 +113,13 @@ def is_symmetric(dataset):
     else:
         df = dataset.get_edgelist(download=True)
         df_a = df.sort_values("src")
-        df_b = df_a[["dst", "src", "wgt"]]
+
+        # create df with swapped src/dst columns
+        df_b = None
+        if "wgt" in df_a.columns:
+            df_b = df_a[["dst", "src", "wgt"]]
+        else:
+            df_b = df_a[["dst", "src"]]
         df_b.rename(columns={"dst": "src", "src": "dst"}, inplace=True)
         # created a df by appending the two
         res = cudf.concat([df_a, df_b])
@@ -172,7 +182,6 @@ def test_get_graph(dataset):
 @pytest.mark.parametrize("dataset", ALL_DATASETS)
 def test_metadata(dataset):
     M = dataset.metadata
-
     assert M is not None
 
 
@@ -310,10 +319,8 @@ def test_is_directed(dataset):
 
 
 @pytest.mark.parametrize("dataset", ALL_DATASETS)
-def test_has_loop(dataset):
-    df = dataset.get_edgelist(download=True)
-
-    assert has_loop(df) == dataset.metadata["has_loop"]
+def test_has_selfloop(dataset):
+    assert has_selfloop(dataset) == dataset.metadata["has_loop"]
 
 
 @pytest.mark.parametrize("dataset", ALL_DATASETS)
@@ -328,6 +335,25 @@ def test_is_multigraph(dataset):
     assert G.is_multigraph() == dataset.metadata["is_multigraph"]
 
 
+# The datasets used for benchmarks are in their own test, since downloading them
+# repeatedly would increase testing overhead significantly
+@pytest.mark.parametrize("dataset", BENCHMARKING_DATASETS)
+def test_benchmarking_datasets(dataset):
+    dataset_is_directed = dataset.metadata["is_directed"]
+    G = dataset.get_graph(
+        download=True, create_using=Graph(directed=dataset_is_directed)
+    )
+
+    assert G.is_directed() == dataset.metadata["is_directed"]
+    assert G.number_of_nodes() == dataset.metadata["number_of_nodes"]
+    assert G.number_of_edges() == dataset.metadata["number_of_edges"]
+    assert has_selfloop(dataset) == dataset.metadata["has_loop"]
+    assert is_symmetric(dataset) == dataset.metadata["is_symmetric"]
+    assert G.is_multigraph() == dataset.metadata["is_multigraph"]
+
+    dataset.unload()
+
+
 @pytest.mark.parametrize("dataset", ALL_DATASETS)
 def test_object_getters(dataset):
     assert dataset.is_directed() == dataset.metadata["is_directed"]
diff --git a/python/cugraph/cugraph/tests/utils/test_resultset.py b/python/cugraph/cugraph/tests/utils/test_resultset.py
new file mode 100644
index 00000000000..3e685c3e905
--- /dev/null
+++ b/python/cugraph/cugraph/tests/utils/test_resultset.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+import cudf
+from cugraph.datasets.dataset import (
+    set_download_dir,
+    get_download_dir,
+)
+from cugraph.testing.resultset import load_resultset, default_resultset_download_dir
+
+###############################################################################
+
+
+def test_load_resultset():
+    with TemporaryDirectory() as tmpd:
+
+        set_download_dir(Path(tmpd))
+        default_resultset_download_dir.set_download_dir(
+            Path(tmpd) / "tests" / "resultsets"
+        )
+        default_resultset_download_dir.get_download_dir().mkdir(
+            parents=True, exist_ok=True
+        )
+
+        datasets_download_dir = get_download_dir()
+        resultsets_download_dir = default_resultset_download_dir.get_download_dir()
+        assert "tests" in os.listdir(datasets_download_dir)
+        assert "resultsets.tar.gz" not in os.listdir(datasets_download_dir / "tests")
+        assert "traversal_mappings.csv" not in os.listdir(resultsets_download_dir)
+
+        load_resultset(
+            "traversal", "https://data.rapids.ai/cugraph/results/resultsets.tar.gz"
+        )
+
+        assert "resultsets.tar.gz" in os.listdir(datasets_download_dir / "tests")
+        assert "traversal_mappings.csv" in os.listdir(resultsets_download_dir)
+
+
+def test_verify_resultset_load():
+    # This test is more detailed than test_load_resultset, where for each module,
+    # we check that every single resultset file is included along with the
+    # corresponding mapping file.
+    with TemporaryDirectory() as tmpd:
+        set_download_dir(Path(tmpd))
+        default_resultset_download_dir.set_download_dir(
+            Path(tmpd) / "tests" / "resultsets"
+        )
+        default_resultset_download_dir.get_download_dir().mkdir(
+            parents=True, exist_ok=True
+        )
+
+        resultsets_download_dir = default_resultset_download_dir.get_download_dir()
+
+        load_resultset(
+            "traversal", "https://data.rapids.ai/cugraph/results/resultsets.tar.gz"
+        )
+
+        resultsets = os.listdir(resultsets_download_dir)
+        downloaded_results = cudf.read_csv(
+            resultsets_download_dir / "traversal_mappings.csv", sep=" "
+        )
+        downloaded_uuids = downloaded_results["#UUID"].values
+        for resultset_uuid in downloaded_uuids:
+            assert str(resultset_uuid) + ".csv" in resultsets