diff --git a/doc/source/user_guide/documentation/classes_dev_uml.svg b/doc/source/user_guide/documentation/classes_dev_uml.svg
index 09c112f5c..84fb2a450 100644
--- a/doc/source/user_guide/documentation/classes_dev_uml.svg
+++ b/doc/source/user_guide/documentation/classes_dev_uml.svg
@@ -235,7 +235,7 @@
 <polygon fill="none" stroke="black" points="1655.5,-496.5 1655.5,-760.5 2271.5,-760.5 2271.5,-496.5 1655.5,-496.5"/>
 <text text-anchor="start" x="1945" y="-745.3" font-family="Times,serif" font-size="14.00">Read</text>
 <polyline fill="none" stroke="black" points="1655.5,-737.5 2271.5,-737.5 "/>
-<text text-anchor="start" x="1899.5" y="-722.3" font-family="Times,serif" font-size="14.00">_filelist : list</text>
+<text text-anchor="start" x="1899.5" y="-722.3" font-family="Times,serif" font-size="14.00">_filelist</text>
 <text text-anchor="start" x="1899.5" y="-707.3" font-family="Times,serif" font-size="14.00">_out_obj : Dataset</text>
 <text text-anchor="start" x="1899.5" y="-692.3" font-family="Times,serif" font-size="14.00">_product</text>
 <text text-anchor="start" x="1899.5" y="-677.3" font-family="Times,serif" font-size="14.00">_read_vars</text>
diff --git a/doc/source/user_guide/documentation/components.rst b/doc/source/user_guide/documentation/components.rst
index 76ffcbb2d..7d81c190c 100644
--- a/doc/source/user_guide/documentation/components.rst
+++ b/doc/source/user_guide/documentation/components.rst
@@ -27,14 +27,6 @@ granules
    :undoc-members:
    :show-inheritance:
 
-is2cat
-------
-
-.. automodule:: icepyx.core.is2cat
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
 is2ref
 ------
 
diff --git a/doc/source/user_guide/documentation/query.rst b/doc/source/user_guide/documentation/query.rst
index 804de27e5..df82aa35b 100644
--- a/doc/source/user_guide/documentation/query.rst
+++ b/doc/source/user_guide/documentation/query.rst
@@ -23,7 +23,6 @@ Attributes
    Query.cycles
    Query.dates
    Query.end_time
-   Query.file_vars
    Query.granules
    Query.order_vars
    Query.product
diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py
index c51c631be..66ceaec1c 100644
--- a/icepyx/core/is2ref.py
+++ b/icepyx/core/is2ref.py
@@ -378,8 +378,10 @@ def extract_product(filepath, auth=None):
             # ATL14 saves the short_name as an array ['ATL14']
             product = product[0]
         product = _validate_product(product)
-    except KeyError:
-        raise "Unable to parse the product name from file metadata"
+    except KeyError as e:
+        raise Exception(
+            "Unable to parse the product name from file metadata"
+        ).with_traceback(e.__traceback__)
 
     # Close the file reader
     f.close()
@@ -421,8 +423,10 @@ def extract_version(filepath, auth=None):
         if isinstance(version, bytes):
             version = version.decode()
 
-    except KeyError:
-        raise "Unable to parse the version from file metadata"
+    except KeyError as e:
+        raise Exception(
+            "Unable to parse the version from file metadata"
+        ).with_traceback(e.__traceback__)
 
     # Close the file reader
     f.close()
diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index e11015935..8cc5afd21 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -1,4 +1,3 @@
-import fnmatch
 import glob
 import os
 import sys
@@ -36,7 +35,8 @@ def _make_np_datetime(df, keyword):
 
     Example
     -------
-    >>> ds = xr.Dataset({"time": ("time_idx", [b'2019-01-11T05:26:31.323722Z'])}, coords={"time_idx": [0]})
+    >>> ds = xr.Dataset({"time": ("time_idx", [b'2019-01-11T05:26:31.323722Z'])},
+    ...                  coords={"time_idx": [0]})
     >>> _make_np_datetime(ds, "time")
     <xarray.Dataset>
     Dimensions:   (time_idx: 1)
@@ -48,7 +48,8 @@ def _make_np_datetime(df, keyword):
     """
 
     if df[keyword].str.endswith("Z"):
-        # manually remove 'Z' from datetime to allow conversion to np.datetime64 object (support for timezones is deprecated and causes a seg fault)
+        # manually remove 'Z' from datetime to allow conversion to np.datetime64 object
+        # (support for timezones is deprecated and causes a seg fault)
         df.update({keyword: df[keyword].str[:-1].astype(np.datetime64)})
 
     else:
@@ -100,165 +101,51 @@ def _get_track_type_str(grp_path) -> (str, str, str):
     return track_str, spot_dim_name, spot_var_name
 
 
-# Dev note: function fully tested (except else, which don't know how to get to)
-def _check_datasource(filepath):
+def _parse_source(data_source, glob_kwargs={}) -> list:
     """
-    Determine if the input is from a local system or is an s3 bucket.
-    Then, validate the inputs (for those on the local system; s3 sources are not validated currently)
-    """
-
-    from pathlib import Path
-
-    import fsspec
-    from fsspec.implementations.local import LocalFileSystem
-
-    source_types = ["is2_local", "is2_s3"]
-
-    if not isinstance(filepath, Path) and not isinstance(filepath, str):
-        raise TypeError("filepath must be a string or Path")
-
-    fsmap = fsspec.get_mapper(str(filepath))
-    output_fs = fsmap.fs
-
-    if "s3" in output_fs.protocol:
-        return source_types[1]
-    elif isinstance(output_fs, LocalFileSystem):
-        assert _validate_source(filepath)
-        return source_types[0]
-    else:
-        raise ValueError("Could not confirm the datasource type.")
+    Parse the user's data_source input based on type.
 
-    """
-    Could also use: os.path.splitext(f.name)[1].lower() to get file extension
-
-    If ultimately want to handle mixed types, save the valid paths in a dict with "s3" or "local" as the keys and the list of the files as the values.
-    Then the dict can also contain a catalog key with a dict of catalogs for each of those types of inputs ("s3" or "local")
-    In general, the issue we'll run into with multiple files is going to be merging during the read in,
-    so it could be beneficial to not hide this too much and mandate users handle this intentionally outside the read in itself.
-
-    this function was derived with some of the following resources, based on echopype
-    https://github.com/OSOceanAcoustics/echopype/blob/ab5128fb8580f135d875580f0469e5fba3193b84/echopype/utils/io.py
-
-    https://filesystem-spec.readthedocs.io/en/latest/api.html?highlight=get_map#fsspec.spec.AbstractFileSystem.glob
-
-    https://filesystem-spec.readthedocs.io/en/latest/_modules/fsspec/implementations/local.html
-
-    https://github.com/OSOceanAcoustics/echopype/blob/ab5128fb8580f135d875580f0469e5fba3193b84/echopype/convert/api.py#L380
-
-    https://echopype.readthedocs.io/en/stable/convert.html
-    """
-
-
-# Dev note: function fully tested as currently written
-def _validate_source(source):
-    """
-    Check that the entered data source paths on the local file system are valid
-
-    Currently, s3 data source paths are not validated.
-    """
-
-    # acceptable inputs (for now) are a single file or directory
-    # would ultimately like to make a Path (from pathlib import Path; isinstance(source, Path)) an option
-    # see https://github.com/OSOceanAcoustics/echopype/blob/ab5128fb8580f135d875580f0469e5fba3193b84/echopype/utils/io.py#L82
-    assert isinstance(source, str), "You must enter your input as a string."
-    assert (
-        os.path.isdir(source) is True or os.path.isfile(source) is True
-    ), "Your data source string is not a valid data source."
-    return True
-
-
-# Dev Note: function is tested (at least loosely)
-def _run_fast_scandir(dir, fn_glob):
-    """
-    Quickly scan nested directories to get a list of filenames that match the fn_glob string.
-    Modified from https://stackoverflow.com/a/59803793/2441026
-    (faster than os.walk or glob methods, and allows filename matching in subdirectories).
-
-    Parameters
-    ----------
-    dir : str
-        full path to the input directory
-
-    fn_glob : str
-        glob-style filename pattern
-
-    Outputs
+    Returns
     -------
-    subfolders : list
-        list of strings of all nested subdirectories
-
-    files : list
-        list of strings containing full paths to each file matching the filename pattern
-    """
-
-    subfolders, files = [], []
-
-    for f in os.scandir(dir):
-        if any(f.name.startswith(s) for s in ["__", "."]):
-            continue
-        if f.is_dir():
-            subfolders.append(f.path)
-        if f.is_file():
-            if fnmatch.fnmatch(f.name, fn_glob):
-                files.append(f.path)
-
-    for dir in list(subfolders):
-        sf, f = _run_fast_scandir(dir, fn_glob)
-        subfolders.extend(sf)
-        files.extend(f)
-
-    return subfolders, files
-
-
-# Need to post on intake's page to see if this would be a useful contribution...
-# https://github.com/intake/intake/blob/0.6.4/intake/source/utils.py#L216
-def _pattern_to_glob(pattern):
+    filelist : list of str
+        List of granule (filenames) to be read in
     """
-    Adapted from intake.source.utils.path_to_glob to convert a path as pattern into a glob style path
-    that uses the pattern's indicated number of '?' instead of '*' where an int was specified.
 
-    Returns pattern if pattern is not a string.
-
-    Parameters
-    ----------
-    pattern : str
-        Path as pattern optionally containing format_strings
+    from pathlib import Path
 
-    Returns
-    -------
-    glob_path : str
-        Path with int format strings replaced with the proper number of '?' and '*' otherwise.
+    if isinstance(data_source, list):
+        assert [isinstance(f, (str, Path)) for f in data_source]
+        # if data_source is a list pass that directly to _filelist
+        filelist = data_source
+    elif os.path.isdir(data_source):
+        # if data_source is a directory glob search the directory and assign to _filelist
+        data_source = os.path.join(data_source, "*")
+        filelist = glob.glob(data_source, **glob_kwargs)
+    elif isinstance(data_source, str) or isinstance(data_source, Path):
+        if data_source.startswith("s3"):
+            # if the string is an s3 path put it in the _filelist without globbing
+            filelist = [data_source]
+        else:
+            # data_source is a globable string
+            filelist = glob.glob(data_source, **glob_kwargs)
+    else:
+        raise TypeError(
+            "data_source should be a list of files, a directory, the path to a file, "
+            "or a glob string."
+        )
 
-    Examples
-    --------
-    >>> _pattern_to_glob('{year}/{month}/{day}.csv')
-    '*/*/*.csv'
-    >>> _pattern_to_glob('{year:4}/{month:2}/{day:2}.csv')
-    '????/??/??.csv'
-    >>> _pattern_to_glob('data/{year:4}{month:02}{day:02}.csv')
-    'data/????????.csv'
-    >>> _pattern_to_glob('data/*.csv')
-    'data/*.csv'
-    """
-    from string import Formatter
+    # Remove any directories from the list (these get generated during recursive
+    # glob search)
+    filelist = [f for f in filelist if not os.path.isdir(f)]
 
-    if not isinstance(pattern, str):
-        return pattern
+    # Make sure a non-zero number of files were found
+    if len(filelist) == 0:
+        raise KeyError(
+            "No files found matching the specified `data_source`. Check your glob "
+            "string or file list."
+        )
 
-    fmt = Formatter()
-    glob_path = ""
-    # prev_field_name = None
-    for literal_text, field_name, format_specs, _ in fmt.parse(format_string=pattern):
-        glob_path += literal_text
-        if field_name and (glob_path != "*"):
-            try:
-                glob_path += "?" * int(format_specs)
-            except ValueError:
-                glob_path += "*"
-                # alternatively, you could use bits=utils._get_parts_of_format_string(resolved_string, literal_texts, format_specs)
-                # and then use len(bits[i]) to get the length of each format_spec
-    # print(glob_path)
-    return glob_path
+    return filelist
 
 
 def _confirm_proceed():
@@ -282,8 +169,8 @@ class Read(EarthdataAuthMixin):
 
     Parameters
     ----------
-    data_source : string, List
-        A string or list which specifies the files to be read.
+    data_source : string, Path, List
+        A string, pathlib.Path object, or list which specifies the files to be read.
         The string can be either:
         1) the path of a single file
         2) the path to a directory or
@@ -291,7 +178,8 @@ class Read(EarthdataAuthMixin):
         The List must be a list of strings, each of which is the path of a single file.
 
     glob_kwargs : dict, default {}
-        Additional arguments to be passed into the [glob.glob()](https://docs.python.org/3/library/glob.html#glob.glob)function
+        Additional arguments to be passed into the
+        [glob.glob()](https://docs.python.org/3/library/glob.html#glob.glob)function
 
     out_obj_type : object, default xarray.Dataset
         The desired format for the data to be read in.
@@ -326,7 +214,8 @@ class Read(EarthdataAuthMixin):
     Reading all files in a directory
     >>> ipx.Read('/path/to/data/') # doctest: +SKIP
 
-    Reading files that match a particular pattern (here, all .h5 files that start with `processed_ATL06_`).
+    Reading files that match a particular pattern
+    (here, all .h5 files that start with `processed_ATL06_`).
     >>> ipx.Read('/path/to/data/processed_ATL06_*.h5') # doctest: +SKIP
 
     Reading a specific list of files
@@ -370,29 +259,7 @@ def __init__(
                 "Please use the `data_source` argument to specify your dataset instead."
             )
 
-        if isinstance(data_source, list):
-            # if data_source is a list pass that directly to _filelist
-            self._filelist = data_source
-        elif os.path.isdir(data_source):
-            # if data_source is a directory glob search the directory and assign to _filelist
-            data_source = os.path.join(data_source, "*")
-            self._filelist = glob.glob(data_source, **glob_kwargs)
-        elif isinstance(data_source, str):
-            if data_source.startswith("s3"):
-                # if the string is an s3 path put it in the _filelist without globbing
-                self._filelist = [data_source]
-            else:
-                # data_source is a globable string
-                self._filelist = glob.glob(data_source, **glob_kwargs)
-        else:
-            raise TypeError(
-                "data_source should be a list of files, a directory, the path to a file, "
-                "or a glob string."
-            )
-
-        # Remove any directories from the list (these get generated during recursive
-        # glob search)
-        self._filelist = [f for f in self._filelist if not os.path.isdir(f)]
+        self._filelist = _parse_source(data_source, glob_kwargs)
 
         # Create a dictionary of the products as read from the metadata
         product_dict = {}
@@ -423,7 +290,7 @@ def __init__(
             )
             _confirm_proceed()
 
-        # Raise warnings or errors for multiple products or products not matching the user-specified product
+        # Raise error if multiple products given
         all_products = list(set(product_dict.values()))
         if len(all_products) > 1:
             raise TypeError(
@@ -431,14 +298,9 @@ def __init__(
                 "Please provide a valid `data_source` parameter indicating files of a single "
                 "product"
             )
-        elif len(all_products) == 0:
-            raise TypeError(
-                "No files found matching the specified `data_source`. Check your glob "
-                "string or file list."
-            )
-        else:
-            # Assign the identified product to the property
-            self._product = all_products[0]
+
+        # Assign the identified product to the property
+        self._product = all_products[0]
 
         if out_obj_type is not None:
             print(
@@ -454,7 +316,8 @@ def __init__(
     def vars(self):
         """
         Return the variables object associated with the data being read in.
-        This instance is generated from the source file or first file in a list of input files (when source is a directory).
+        This instance is generated from the source file or first file in a list of input files
+        (when source is a directory).
 
         See Also
         --------
@@ -507,7 +370,8 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict):
             the second list contains the second portion of the group name, etc.
             "none" is used to fill in where paths are shorter than the longest path.
         wanted_dict : dict
-            Dictionary with variable names as keys and a list of group + variable paths containing those variables as values.
+            Dictionary with variable names as keys and a list of group +
+            variable paths containing those variables as values.
 
         Returns
         -------
@@ -600,7 +464,8 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict):
                     )
                 )
 
-                # for the subgoups where there is 1d delta time data, make sure that the cycle number is still a coordinate for merging
+                # for the subgoups where there is 1d delta time data,
+                # make sure that the cycle number is still a coordinate for merging
                 try:
                     ds = ds.assign_coords(
                         {
@@ -643,14 +508,16 @@ def _combine_nested_vars(is2ds, ds, grp_path, wanted_dict):
         grp_path : str
             hdf5 group path read into ds
         wanted_dict : dict
-            Dictionary with variable names as keys and a list of group + variable paths containing those variables as values.
+            Dictionary with variable names as keys and a list of group +
+            variable paths containing those variables as values.
 
         Returns
         -------
         Xarray Dataset with variables from the ds variable group added.
         """
 
-        # Dev Goal: improve this type of iterating to minimize amount of looping required. Would a path handling library be useful here?
+        # Dev Goal: improve this type of iterating to minimize amount of looping required.
+        # Would a path handling library be useful here?
         grp_spec_vars = [
             k for k, v in wanted_dict.items() if any(f"{grp_path}/{k}" in x for x in v)
         ]
@@ -682,7 +549,8 @@ def _combine_nested_vars(is2ds, ds, grp_path, wanted_dict):
 
     def load(self):
         """
-        Create a single Xarray Dataset containing the data from one or more files and/or ground tracks.
+        Create a single Xarray Dataset containing the data from one or more
+        files and/or ground tracks.
         Uses icepyx's ICESat-2 data product awareness and Xarray's `combine_by_coords` function.
 
         All items in the wanted variables list will be loaded from the files into memory.
@@ -778,8 +646,6 @@ def _build_dataset_template(self, file):
 
         It may be possible to expand this function to provide multiple templates.
         """
-        # NOTE: use the hdf5 library to grab the attr for the product specifier
-        # can ultimately then use it to check against user specified one or merge strategies (or to return a list of ds)
 
         is2ds = xr.Dataset(
             coords=dict(
@@ -798,7 +664,8 @@ def _read_single_grp(self, file, grp_path):
         ----------
         file : str
             Full path to ICESat-2 data file.
-            Currently tested for locally downloaded files; untested but hopefully works for s3 stored cloud files.
+            Currently tested for locally downloaded files;
+            untested but hopefully works for s3 stored cloud files.
         grp_path : str
             Full string to a variable group.
             E.g. 'gt1l/land_ice_segments'
@@ -818,23 +685,27 @@ def _read_single_grp(self, file, grp_path):
 
     def _build_single_file_dataset(self, file, groups_list):
         """
-        Create a single xarray dataset with all of the wanted variables/groups from the wanted var list for a single data file/url.
+        Create a single xarray dataset with all of the wanted variables/groups
+        from the wanted var list for a single data file/url.
 
         Parameters
         ----------
         file : str
             Full path to ICESat-2 data file.
-            Currently tested for locally downloaded files; untested but hopefully works for s3 stored cloud files.
+            Currently tested for locally downloaded files;
+            untested but hopefully works for s3 stored cloud files.
 
         groups_list : list of strings
             List of full paths to data variables within the file.
-            e.g. ['orbit_info/sc_orient', 'gt1l/land_ice_segments/h_li', 'gt1l/land_ice_segments/latitude', 'gt1l/land_ice_segments/longitude']
+            e.g. ['orbit_info/sc_orient', 'gt1l/land_ice_segments/h_li',
+            'gt1l/land_ice_segments/latitude', 'gt1l/land_ice_segments/longitude']
 
         Returns
         -------
         Xarray Dataset
         """
-        # DEVNOTE: if and elif does not actually apply wanted variable list, and has not been tested for merging multiple files into one ds
+        # DEVNOTE: if and elif does not actually apply wanted variable list,
+        # and has not been tested for merging multiple files into one ds
         # if a gridded product
         # TODO: all products need to be tested, and quicklook products added or explicitly excluded
         # Level 3b, gridded (netcdf): ATL14, 15, 16, 17, 18, 19, 20, 21
@@ -861,13 +732,14 @@ def _build_single_file_dataset(self, file, groups_list):
             )
             wanted_groups_set = set(wanted_groups)
 
-            # orbit_info is used automatically as the first group path so the info is available for the rest of the groups
+            # orbit_info is used automatically as the first group path
+            # so the info is available for the rest of the groups
             # wanted_groups_set.remove("orbit_info")
             wanted_groups_set.remove("ancillary_data")
             # Note: the sorting is critical for datasets with highly nested groups
             wanted_groups_list = ["ancillary_data"] + sorted(wanted_groups_set)
 
-            # returns the wanted groups as a list of lists with group path string elements separated
+            # returns wanted groups as a list of lists with group path string elements separated
             _, wanted_groups_tiered = Variables.parse_var_list(
                 groups_list, tiered=True, tiered_vars=True
             )
@@ -892,14 +764,15 @@ def _build_single_file_dataset(self, file, groups_list):
                 groups_list, tiered=False
             )
             wanted_groups_set = set(wanted_groups)
-            # orbit_info is used automatically as the first group path so the info is available for the rest of the groups
+            # orbit_info is used automatically as the first group path
+            # so the info is available for the rest of the groups
             wanted_groups_set.remove("orbit_info")
             wanted_groups_set.remove("ancillary_data")
             # Note: the sorting is critical for datasets with highly nested groups
             wanted_groups_list = ["orbit_info", "ancillary_data"] + sorted(
                 wanted_groups_set
             )
-            # returns the wanted groups as a list of lists with group path string elements separated
+            # returns wanted groups as a list of lists with group path string elements separated
             _, wanted_groups_tiered = Variables.parse_var_list(
                 groups_list, tiered=True, tiered_vars=True
             )
@@ -912,7 +785,8 @@ def _build_single_file_dataset(self, file, groups_list):
                     is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict
                 )
 
-                # if there are any deeper nested variables, get those so they have actual coordinates and add them
+                # if there are any deeper nested variables,
+                # get those so they have actual coordinates and add them
                 # this may apply to (at a minimum): ATL08
                 if any(grp_path in grp_path2 for grp_path2 in wanted_groups_list):
                     for grp_path2 in wanted_groups_list:
diff --git a/icepyx/tests/test_read.py b/icepyx/tests/test_read.py
index 67b29b598..20807c410 100644
--- a/icepyx/tests/test_read.py
+++ b/icepyx/tests/test_read.py
@@ -1,97 +1,85 @@
 import pytest
 
-from icepyx.core.read import Read
 import icepyx.core.read as read
 
 
-def test_check_datasource_type():
-    ermesg = "filepath must be a string or Path"
+# note isdir will issue a TypeError if a tuple is passed
+def test_parse_source_bad_input_type():
+    ermesg = (
+        "data_source should be a list of files, a directory, the path to a file, "
+        "or a glob string."
+    )
     with pytest.raises(TypeError, match=ermesg):
-        read._check_datasource(246)
-
-
-@pytest.mark.parametrize(
-    "filepath, expect",
-    [
-        ("./", "is2_local"),
-        (
-            """s3://nsidc-cumulus-prod-protected/ATLAS/
-            ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5""",
-            "is2_s3",
-        ),
-    ],
-)
-def test_check_datasource(filepath, expect):
-    source_type = read._check_datasource(filepath)
-    assert source_type == expect
-
-
-# not sure what I could enter here would get to the else...
-# def test_unknown_datasource_type():
-#     ermesg = "Could not confirm the datasource type."
-#     with pytest.raises(ValueError, match=ermesg):
-#         read._check_datasource("")
-
+        read._parse_source(150)
+        read._parse_source({"myfiles": "./my_valid_path/file.h5"})
 
-def test_validate_source_str_given_as_list():
-    ermesg = "You must enter your input as a string."
-    with pytest.raises(AssertionError, match=ermesg):
-        read._validate_source(["/path/to/valid/ATL06_file.py"])
 
-
-def test_validate_source_str_not_a_dir_or_file():
-    ermesg = "Your data source string is not a valid data source."
-    with pytest.raises(AssertionError, match=ermesg):
-        read._validate_source("./fake/dirpath")
-        read._validate_source("./fake_file.h5")
+def test_parse_source_no_files():
+    ermesg = (
+        "No files found matching the specified `data_source`. Check your glob "
+        "string or file list."
+    )
+    with pytest.raises(KeyError, match=ermesg):
+        read._parse_source("./icepyx/bogus_glob")
 
 
 @pytest.mark.parametrize(
-    "dir, fn_glob, expect",
+    "source, expect",
     [
-        (
-            "./icepyx/",
-            "is2*.py",
-            (
-                sorted(
-                    [
-                        "./icepyx/core",
-                        "./icepyx/quest",
-                        "./icepyx/quest/dataset_scripts",
-                        "./icepyx/tests",
-                    ]
-                ),
-                sorted(
-                    [
-                        "./icepyx/core/is2ref.py",
-                        "./icepyx/tests/is2class_query.py",
-                    ]
-                ),
+        (  # check list input
+            [
+                "./icepyx/core/is2ref.py",
+                "./icepyx/tests/is2class_query.py",
+            ],
+            sorted(
+                [
+                    "./icepyx/core/is2ref.py",
+                    "./icepyx/tests/is2class_query.py",
+                ]
             ),
         ),
-        (
-            "./icepyx/core",
-            "is2*.py",
-            ([], ["./icepyx/core/is2ref.py"]),
+        (  # check dir input
+            "./examples",
+            [
+                "./examples/README.md",
+            ],
         ),
-        (
-            "./icepyx",
-            "bogus_glob",
-            (
+        (  # check filename string with glob pattern input
+            "./icepyx/**/is2*.py",
+            sorted(
                 [
-                    "./icepyx/core",
-                    "./icepyx/quest",
-                    "./icepyx/quest/dataset_scripts",
-                    "./icepyx/tests",
-                ],
-                [],
+                    "./icepyx/core/is2ref.py",
+                    "./icepyx/tests/is2class_query.py",
+                ]
+            ),
+        ),
+        (  # check filename string without glob pattern input
+            "./icepyx/core/is2ref.py",
+            [
+                "./icepyx/core/is2ref.py",
+            ],
+        ),
+        (  # check s3 filename string
+            (
+                "s3://nsidc-cumulus-prod-protected/ATLAS/"
+                "ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5"
             ),
+            [
+                (
+                    "s3://nsidc-cumulus-prod-protected/ATLAS/"
+                    "ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5"
+                ),
+            ],
+        ),
+        (
+            "./icepyx/core/is2*.py",
+            ["./icepyx/core/is2ref.py"],
         ),
     ],
 )
-def test_check_run_fast_scandir(dir, fn_glob, expect):
-    (subfolders, files) = read._run_fast_scandir(dir, fn_glob)
-    assert (sorted(subfolders), sorted(files)) == expect
+def test_parse_source(source, expect):
+    filelist = read._parse_source(source, glob_kwargs={"recursive": True})
+    assert (sorted(filelist)) == expect
 
 
 @pytest.mark.parametrize(
@@ -114,18 +102,3 @@ def test_get_track_type_str(
         exp_spot_dim_name,
         exp_spot_var_name,
     )
-
-
-# Best way to test this may be by including a small sample file with the repo
-# (which can be used for testing some of the catalog/read-in functions as well)
-# def test_invalid_filename_pattern_in_file():
-#     ermesg = "Your input filename does not match the specified pattern."
-# default_pattern = Read("/path/to/valid/source/file")._filename_pattern
-#     with pytest.raises(AssertionError, match=ermesg):
-#         read._validate_source('/valid/filepath/with/non-default/filename/pattern.h5', default_pattern)
-
-# def test_invalid_filename_pattern_in_dir():
-#     ermesg = "None of your filenames match the specified pattern."
-#     default_pattern = Read("/path/to/valid/dir/")._filename_pattern
-#     with pytest.raises(AssertionError, match=ermesg):
-#         read._validate_source('/valid/dirpath/with/non-default/filename/pattern.h5', default_pattern)
diff --git a/icepyx/tests/test_validate_inputs.py b/icepyx/tests/test_validate_inputs.py
index 0b5f2f2eb..4d0ea0bd5 100644
--- a/icepyx/tests/test_validate_inputs.py
+++ b/icepyx/tests/test_validate_inputs.py
@@ -1,7 +1,4 @@
 import pytest
-import warnings
-import datetime as dt
-import numpy as np
 
 import icepyx.core.validate_inputs as val
 
@@ -70,3 +67,35 @@ def test_tracks_valid():
         val.tracks(1388)
     # check that warning message matches expected
     assert record[0].message.args[0] == expmsg
+
+
+@pytest.mark.parametrize(
+    "filepath, expect",
+    [
+        ("./", "./"),
+        (
+            """s3://nsidc-cumulus-prod-protected/ATLAS/
+            ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5""",
+            """s3://nsidc-cumulus-prod-protected/ATLAS/
+            ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5""",
+        ),
+    ],
+)
+def test_check_s3bucket(filepath, expect):
+    verified_path = val.check_s3bucket(filepath)
+    assert verified_path == expect
+
+
+def test_wrong_s3bucket():
+    filepath = """s3://notnsidc-cumulus-prod-protected/ATLAS/
+            ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5"""
+
+    expmsg = (
+        "s3 data being read from outside the NSIDC data bucket. Icepyx can "
+        "read this data, but available data lists may not be accurate."
+    )
+
+    with pytest.warns(UserWarning) as record:
+        val.check_s3bucket(filepath)
+
+    assert record[0].message.args[0] == expmsg