From 07afb1d57bb93d72c623f7a48d9e84956bf2f7dc Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Fri, 26 Jan 2024 15:29:28 -0500
Subject: [PATCH 01/22] convert _validate_source() to _parse_source() and
 update tests

---
 icepyx/core/read.py       | 106 ++++++++++++++------------------------
 icepyx/tests/test_read.py |  46 ++++++-----------
 2 files changed, 53 insertions(+), 99 deletions(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index e11015935..e4289de56 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -149,9 +149,44 @@ def _check_datasource(filepath):
     """
 
 
-# Dev note: function fully tested as currently written
-def _validate_source(source):
+def _parse_source(data_source, glob_kwargs) -> list:
     """
+    Parse the data_source input based on type.
+
+    Returns
+    -------
+    filelist : list of str
+        List of granule (filenames) to be read in
+    """
+
+    if isinstance(data_source, list):
+        # if data_source is a list pass that directly to _filelist
+        filelist = data_source
+    elif os.path.isdir(data_source):
+        # if data_source is a directory glob search the directory and assign to _filelist
+        data_source = os.path.join(data_source, "*")
+        filelist = glob.glob(data_source, **glob_kwargs)
+    elif isinstance(data_source, str):
+        if data_source.startswith("s3"):
+            # if the string is an s3 path put it in the _filelist without globbing
+            filelist = [data_source]
+        else:
+            # data_source is a globable string
+            filelist = glob.glob(data_source, **glob_kwargs)
+    else:
+        raise TypeError(
+            "data_source should be a list of files, a directory, the path to a file, "
+            "or a glob string."
+        )
+
+    # Remove any directories from the list (these get generated during recursive
+    # glob search)
+    filelist = [f for f in filelist if not os.path.isdir(f)]
+
+    return filelist
+
+    """
+    
     Check that the entered data source paths on the local file system are valid
 
     Currently, s3 data source paths are not validated.
@@ -167,49 +202,6 @@ def _validate_source(source):
     return True
 
 
-# Dev Note: function is tested (at least loosely)
-def _run_fast_scandir(dir, fn_glob):
-    """
-    Quickly scan nested directories to get a list of filenames that match the fn_glob string.
-    Modified from https://stackoverflow.com/a/59803793/2441026
-    (faster than os.walk or glob methods, and allows filename matching in subdirectories).
-
-    Parameters
-    ----------
-    dir : str
-        full path to the input directory
-
-    fn_glob : str
-        glob-style filename pattern
-
-    Outputs
-    -------
-    subfolders : list
-        list of strings of all nested subdirectories
-
-    files : list
-        list of strings containing full paths to each file matching the filename pattern
-    """
-
-    subfolders, files = [], []
-
-    for f in os.scandir(dir):
-        if any(f.name.startswith(s) for s in ["__", "."]):
-            continue
-        if f.is_dir():
-            subfolders.append(f.path)
-        if f.is_file():
-            if fnmatch.fnmatch(f.name, fn_glob):
-                files.append(f.path)
-
-    for dir in list(subfolders):
-        sf, f = _run_fast_scandir(dir, fn_glob)
-        subfolders.extend(sf)
-        files.extend(f)
-
-    return subfolders, files
-
-
 # Need to post on intake's page to see if this would be a useful contribution...
 # https://github.com/intake/intake/blob/0.6.4/intake/source/utils.py#L216
 def _pattern_to_glob(pattern):
@@ -370,29 +362,7 @@ def __init__(
                 "Please use the `data_source` argument to specify your dataset instead."
             )
 
-        if isinstance(data_source, list):
-            # if data_source is a list pass that directly to _filelist
-            self._filelist = data_source
-        elif os.path.isdir(data_source):
-            # if data_source is a directory glob search the directory and assign to _filelist
-            data_source = os.path.join(data_source, "*")
-            self._filelist = glob.glob(data_source, **glob_kwargs)
-        elif isinstance(data_source, str):
-            if data_source.startswith("s3"):
-                # if the string is an s3 path put it in the _filelist without globbing
-                self._filelist = [data_source]
-            else:
-                # data_source is a globable string
-                self._filelist = glob.glob(data_source, **glob_kwargs)
-        else:
-            raise TypeError(
-                "data_source should be a list of files, a directory, the path to a file, "
-                "or a glob string."
-            )
-
-        # Remove any directories from the list (these get generated during recursive
-        # glob search)
-        self._filelist = [f for f in self._filelist if not os.path.isdir(f)]
+        self._filelist = _parse_source(data_source, glob_kwargs)
 
         # Create a dictionary of the products as read from the metadata
         product_dict = {}
diff --git a/icepyx/tests/test_read.py b/icepyx/tests/test_read.py
index 67b29b598..147472593 100644
--- a/icepyx/tests/test_read.py
+++ b/icepyx/tests/test_read.py
@@ -50,48 +50,32 @@ def test_validate_source_str_not_a_dir_or_file():
     "dir, fn_glob, expect",
     [
         (
-            "./icepyx/",
+            "./icepyx/**/",
             "is2*.py",
-            (
-                sorted(
-                    [
-                        "./icepyx/core",
-                        "./icepyx/quest",
-                        "./icepyx/quest/dataset_scripts",
-                        "./icepyx/tests",
-                    ]
-                ),
-                sorted(
-                    [
-                        "./icepyx/core/is2ref.py",
-                        "./icepyx/tests/is2class_query.py",
-                    ]
-                ),
+            sorted(
+                [
+                    "./icepyx/core/is2ref.py",
+                    "./icepyx/tests/is2class_query.py",
+                ]
             ),
         ),
         (
-            "./icepyx/core",
+            "./icepyx/core/",
             "is2*.py",
-            ([], ["./icepyx/core/is2ref.py"]),
+            ["./icepyx/core/is2ref.py"],
         ),
         (
-            "./icepyx",
+            "./icepyx/",
             "bogus_glob",
-            (
-                [
-                    "./icepyx/core",
-                    "./icepyx/quest",
-                    "./icepyx/quest/dataset_scripts",
-                    "./icepyx/tests",
-                ],
-                [],
-            ),
+            [],
         ),
     ],
 )
-def test_check_run_fast_scandir(dir, fn_glob, expect):
-    (subfolders, files) = read._run_fast_scandir(dir, fn_glob)
-    assert (sorted(subfolders), sorted(files)) == expect
+def test_parse_source(dir, fn_glob, expect):
+    filelist = read._parse_source(dir + fn_glob, glob_kwargs={"recursive": True})
+    print(filelist)
+    print(expect)
+    assert (sorted(filelist)) == expect
 
 
 @pytest.mark.parametrize(

From 615f7a244d902897762bb9843320c5fe2a90cc27 Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Fri, 26 Jan 2024 15:42:42 -0500
Subject: [PATCH 02/22] add Path as accepted data_source type to read

---
 icepyx/core/read.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index e4289de56..70642234e 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -159,6 +159,8 @@ def _parse_source(data_source, glob_kwargs) -> list:
         List of granule (filenames) to be read in
     """
 
+    from pathlib import Path
+
     if isinstance(data_source, list):
         # if data_source is a list pass that directly to _filelist
         filelist = data_source
@@ -166,7 +168,7 @@ def _parse_source(data_source, glob_kwargs) -> list:
         # if data_source is a directory glob search the directory and assign to _filelist
         data_source = os.path.join(data_source, "*")
         filelist = glob.glob(data_source, **glob_kwargs)
-    elif isinstance(data_source, str):
+    elif isinstance(data_source, str) or isinstance(data_source, Path):
         if data_source.startswith("s3"):
             # if the string is an s3 path put it in the _filelist without globbing
             filelist = [data_source]
@@ -274,8 +276,8 @@ class Read(EarthdataAuthMixin):
 
     Parameters
     ----------
-    data_source : string, List
-        A string or list which specifies the files to be read.
+    data_source : string, Path, List
+        A string, pathlib.Path object, or list which specifies the files to be read.
         The string can be either:
         1) the path of a single file
         2) the path to a directory or

From 9ef66a9b9f507118695796d49ac399755c43a781 Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Fri, 26 Jan 2024 15:45:15 -0500
Subject: [PATCH 03/22] remove _check_datasource function (for deprecated
 keyword inputs)

---
 icepyx/core/read.py       | 65 ---------------------------------------
 icepyx/tests/test_read.py | 42 +++++++++----------------
 2 files changed, 15 insertions(+), 92 deletions(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index 70642234e..949e28de0 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -100,55 +100,6 @@ def _get_track_type_str(grp_path) -> (str, str, str):
     return track_str, spot_dim_name, spot_var_name
 
 
-# Dev note: function fully tested (except else, which don't know how to get to)
-def _check_datasource(filepath):
-    """
-    Determine if the input is from a local system or is an s3 bucket.
-    Then, validate the inputs (for those on the local system; s3 sources are not validated currently)
-    """
-
-    from pathlib import Path
-
-    import fsspec
-    from fsspec.implementations.local import LocalFileSystem
-
-    source_types = ["is2_local", "is2_s3"]
-
-    if not isinstance(filepath, Path) and not isinstance(filepath, str):
-        raise TypeError("filepath must be a string or Path")
-
-    fsmap = fsspec.get_mapper(str(filepath))
-    output_fs = fsmap.fs
-
-    if "s3" in output_fs.protocol:
-        return source_types[1]
-    elif isinstance(output_fs, LocalFileSystem):
-        assert _validate_source(filepath)
-        return source_types[0]
-    else:
-        raise ValueError("Could not confirm the datasource type.")
-
-    """
-    Could also use: os.path.splitext(f.name)[1].lower() to get file extension
-
-    If ultimately want to handle mixed types, save the valid paths in a dict with "s3" or "local" as the keys and the list of the files as the values.
-    Then the dict can also contain a catalog key with a dict of catalogs for each of those types of inputs ("s3" or "local")
-    In general, the issue we'll run into with multiple files is going to be merging during the read in,
-    so it could be beneficial to not hide this too much and mandate users handle this intentionally outside the read in itself.
-
-    this function was derived with some of the following resources, based on echopype
-    https://github.com/OSOceanAcoustics/echopype/blob/ab5128fb8580f135d875580f0469e5fba3193b84/echopype/utils/io.py
-
-    https://filesystem-spec.readthedocs.io/en/latest/api.html?highlight=get_map#fsspec.spec.AbstractFileSystem.glob
-
-    https://filesystem-spec.readthedocs.io/en/latest/_modules/fsspec/implementations/local.html
-
-    https://github.com/OSOceanAcoustics/echopype/blob/ab5128fb8580f135d875580f0469e5fba3193b84/echopype/convert/api.py#L380
-
-    https://echopype.readthedocs.io/en/stable/convert.html
-    """
-
-
 def _parse_source(data_source, glob_kwargs) -> list:
     """
     Parse the data_source input based on type.
@@ -187,22 +138,6 @@ def _parse_source(data_source, glob_kwargs) -> list:
 
     return filelist
 
-    """
-    
-    Check that the entered data source paths on the local file system are valid
-
-    Currently, s3 data source paths are not validated.
-    """
-
-    # acceptable inputs (for now) are a single file or directory
-    # would ultimately like to make a Path (from pathlib import Path; isinstance(source, Path)) an option
-    # see https://github.com/OSOceanAcoustics/echopype/blob/ab5128fb8580f135d875580f0469e5fba3193b84/echopype/utils/io.py#L82
-    assert isinstance(source, str), "You must enter your input as a string."
-    assert (
-        os.path.isdir(source) is True or os.path.isfile(source) is True
-    ), "Your data source string is not a valid data source."
-    return True
-
 
 # Need to post on intake's page to see if this would be a useful contribution...
 # https://github.com/intake/intake/blob/0.6.4/intake/source/utils.py#L216
diff --git a/icepyx/tests/test_read.py b/icepyx/tests/test_read.py
index 147472593..2047357f2 100644
--- a/icepyx/tests/test_read.py
+++ b/icepyx/tests/test_read.py
@@ -4,33 +4,21 @@
 import icepyx.core.read as read
 
 
-def test_check_datasource_type():
-    ermesg = "filepath must be a string or Path"
-    with pytest.raises(TypeError, match=ermesg):
-        read._check_datasource(246)
-
-
-@pytest.mark.parametrize(
-    "filepath, expect",
-    [
-        ("./", "is2_local"),
-        (
-            """s3://nsidc-cumulus-prod-protected/ATLAS/
-            ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5""",
-            "is2_s3",
-        ),
-    ],
-)
-def test_check_datasource(filepath, expect):
-    source_type = read._check_datasource(filepath)
-    assert source_type == expect
-
-
-# not sure what I could enter here would get to the else...
-# def test_unknown_datasource_type():
-#     ermesg = "Could not confirm the datasource type."
-#     with pytest.raises(ValueError, match=ermesg):
-#         read._check_datasource("")
+# use as basis for test s3 function
+# @pytest.mark.parametrize(
+#     "filepath, expect",
+#     [
+#         ("./", "is2_local"),
+#         (
+#             """s3://nsidc-cumulus-prod-protected/ATLAS/
+#             ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5""",
+#             "is2_s3",
+#         ),
+#     ],
+# )
+# def test_check_datasource(filepath, expect):
+#     source_type = read._check_datasource(filepath)
+#     assert source_type == expect
 
 
 def test_validate_source_str_given_as_list():

From a031a7a201575dc5df033dde72db6eb27d8b751e Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Fri, 26 Jan 2024 16:57:45 -0500
Subject: [PATCH 04/22] add check_s3_bucket function tests

---
 icepyx/tests/test_read.py            | 17 ---------------
 icepyx/tests/test_validate_inputs.py | 32 ++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/icepyx/tests/test_read.py b/icepyx/tests/test_read.py
index 2047357f2..8ff200632 100644
--- a/icepyx/tests/test_read.py
+++ b/icepyx/tests/test_read.py
@@ -4,23 +4,6 @@
 import icepyx.core.read as read
 
 
-# use as basis for test s3 function
-# @pytest.mark.parametrize(
-#     "filepath, expect",
-#     [
-#         ("./", "is2_local"),
-#         (
-#             """s3://nsidc-cumulus-prod-protected/ATLAS/
-#             ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5""",
-#             "is2_s3",
-#         ),
-#     ],
-# )
-# def test_check_datasource(filepath, expect):
-#     source_type = read._check_datasource(filepath)
-#     assert source_type == expect
-
-
 def test_validate_source_str_given_as_list():
     ermesg = "You must enter your input as a string."
     with pytest.raises(AssertionError, match=ermesg):
diff --git a/icepyx/tests/test_validate_inputs.py b/icepyx/tests/test_validate_inputs.py
index 0b5f2f2eb..b776a7fc8 100644
--- a/icepyx/tests/test_validate_inputs.py
+++ b/icepyx/tests/test_validate_inputs.py
@@ -70,3 +70,35 @@ def test_tracks_valid():
         val.tracks(1388)
     # check that warning message matches expected
     assert record[0].message.args[0] == expmsg
+
+
+@pytest.mark.parametrize(
+    "filepath, expect",
+    [
+        ("./", "./"),
+        (
+            """s3://nsidc-cumulus-prod-protected/ATLAS/
+            ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5""",
+            """s3://nsidc-cumulus-prod-protected/ATLAS/
+            ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5""",
+        ),
+    ],
+)
+def test_check_s3bucket(filepath, expect):
+    verified_path = val.check_s3bucket(filepath)
+    assert verified_path == expect
+
+
+def test_wrong_s3bucket():
+    filepath = """s3://notnsidc-cumulus-prod-protected/ATLAS/
+            ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5"""
+
+    expmsg = (
+        "s3 data being read from outside the NSIDC data bucket. Icepyx can "
+        "read this data, but available data lists may not be accurate."
+    )
+
+    with pytest.warns(UserWarning) as record:
+        val.check_s3bucket(filepath)
+
+    assert record[0].message.args[0] == expmsg

From 465895e7ff77f8aded790fffba16f058e9dc8e18 Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Mon, 29 Jan 2024 16:22:32 -0500
Subject: [PATCH 05/22] remove pattern_to_glob function

---
 icepyx/core/read.py       | 51 ---------------------------------------
 icepyx/tests/test_read.py |  2 --
 2 files changed, 53 deletions(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index 949e28de0..7582dbd0a 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -139,57 +139,6 @@ def _parse_source(data_source, glob_kwargs) -> list:
     return filelist
 
 
-# Need to post on intake's page to see if this would be a useful contribution...
-# https://github.com/intake/intake/blob/0.6.4/intake/source/utils.py#L216
-def _pattern_to_glob(pattern):
-    """
-    Adapted from intake.source.utils.path_to_glob to convert a path as pattern into a glob style path
-    that uses the pattern's indicated number of '?' instead of '*' where an int was specified.
-
-    Returns pattern if pattern is not a string.
-
-    Parameters
-    ----------
-    pattern : str
-        Path as pattern optionally containing format_strings
-
-    Returns
-    -------
-    glob_path : str
-        Path with int format strings replaced with the proper number of '?' and '*' otherwise.
-
-    Examples
-    --------
-    >>> _pattern_to_glob('{year}/{month}/{day}.csv')
-    '*/*/*.csv'
-    >>> _pattern_to_glob('{year:4}/{month:2}/{day:2}.csv')
-    '????/??/??.csv'
-    >>> _pattern_to_glob('data/{year:4}{month:02}{day:02}.csv')
-    'data/????????.csv'
-    >>> _pattern_to_glob('data/*.csv')
-    'data/*.csv'
-    """
-    from string import Formatter
-
-    if not isinstance(pattern, str):
-        return pattern
-
-    fmt = Formatter()
-    glob_path = ""
-    # prev_field_name = None
-    for literal_text, field_name, format_specs, _ in fmt.parse(format_string=pattern):
-        glob_path += literal_text
-        if field_name and (glob_path != "*"):
-            try:
-                glob_path += "?" * int(format_specs)
-            except ValueError:
-                glob_path += "*"
-                # alternatively, you could use bits=utils._get_parts_of_format_string(resolved_string, literal_texts, format_specs)
-                # and then use len(bits[i]) to get the length of each format_spec
-    # print(glob_path)
-    return glob_path
-
-
 def _confirm_proceed():
     """
     Ask the user if they wish to proceed with processing. If 'y', or 'yes', then continue. Any
diff --git a/icepyx/tests/test_read.py b/icepyx/tests/test_read.py
index 8ff200632..439b2950c 100644
--- a/icepyx/tests/test_read.py
+++ b/icepyx/tests/test_read.py
@@ -44,8 +44,6 @@ def test_validate_source_str_not_a_dir_or_file():
 )
 def test_parse_source(dir, fn_glob, expect):
     filelist = read._parse_source(dir + fn_glob, glob_kwargs={"recursive": True})
-    print(filelist)
-    print(expect)
     assert (sorted(filelist)) == expect
 
 

From 0635d0c59cbc7a9ab2f77d8950c591e74b67f159 Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Mon, 29 Jan 2024 17:20:27 -0500
Subject: [PATCH 06/22] add more tests for all cases in _parse_source

---
 icepyx/tests/test_read.py | 55 +++++++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/icepyx/tests/test_read.py b/icepyx/tests/test_read.py
index 439b2950c..fe8d6406f 100644
--- a/icepyx/tests/test_read.py
+++ b/icepyx/tests/test_read.py
@@ -18,11 +18,13 @@ def test_validate_source_str_not_a_dir_or_file():
 
 
 @pytest.mark.parametrize(
-    "dir, fn_glob, expect",
+    "source, expect",
     [
-        (
-            "./icepyx/**/",
-            "is2*.py",
+        (  # check list input
+            [
+                "./icepyx/core/is2ref.py",
+                "./icepyx/tests/is2class_query.py",
+            ],
             sorted(
                 [
                     "./icepyx/core/is2ref.py",
@@ -30,20 +32,51 @@ def test_validate_source_str_not_a_dir_or_file():
                 ]
             ),
         ),
+        (  # check dir input
+            "./examples",
+            [
+                "./examples/README.md",
+            ],
+        ),
+        (  # check filename string with glob pattern input
+            "./icepyx/**/is2*.py",
+            sorted(
+                [
+                    "./icepyx/core/is2ref.py",
+                    "./icepyx/tests/is2class_query.py",
+                ]
+            ),
+        ),
+        (  # check filename string without glob pattern input
+            "./icepyx/core/is2ref.py",
+            [
+                "./icepyx/core/is2ref.py",
+            ],
+        ),
+        (  # check s3 filename string
+            (
+                "s3://nsidc-cumulus-prod-protected/ATLAS/"
+                "ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5"
+            ),
+            [
+                (
+                    "s3://nsidc-cumulus-prod-protected/ATLAS/"
+                    "ATL03/006/2019/11/30/ATL03_20191130221008_09930503_006_01.h5"
+                ),
+            ],
+        ),
         (
-            "./icepyx/core/",
-            "is2*.py",
+            "./icepyx/core/is2*.py",
             ["./icepyx/core/is2ref.py"],
         ),
-        (
-            "./icepyx/",
-            "bogus_glob",
+        (  # check bad input
+            "./icepyx/bogus_glob",
             [],
         ),
     ],
 )
-def test_parse_source(dir, fn_glob, expect):
-    filelist = read._parse_source(dir + fn_glob, glob_kwargs={"recursive": True})
+def test_parse_source(source, expect):
+    filelist = read._parse_source(source, glob_kwargs={"recursive": True})
     assert (sorted(filelist)) == expect
 
 

From 12e6f420b99019af4462e70de8df92cd17c59cfb Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Mon, 29 Jan 2024 17:21:14 -0500
Subject: [PATCH 07/22] remove unneeded commented tests

---
 icepyx/tests/test_read.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/icepyx/tests/test_read.py b/icepyx/tests/test_read.py
index fe8d6406f..d6431cc67 100644
--- a/icepyx/tests/test_read.py
+++ b/icepyx/tests/test_read.py
@@ -100,18 +100,3 @@ def test_get_track_type_str(
         exp_spot_dim_name,
         exp_spot_var_name,
     )
-
-
-# Best way to test this may be by including a small sample file with the repo
-# (which can be used for testing some of the catalog/read-in functions as well)
-# def test_invalid_filename_pattern_in_file():
-#     ermesg = "Your input filename does not match the specified pattern."
-# default_pattern = Read("/path/to/valid/source/file")._filename_pattern
-#     with pytest.raises(AssertionError, match=ermesg):
-#         read._validate_source('/valid/filepath/with/non-default/filename/pattern.h5', default_pattern)
-
-# def test_invalid_filename_pattern_in_dir():
-#     ermesg = "None of your filenames match the specified pattern."
-#     default_pattern = Read("/path/to/valid/dir/")._filename_pattern
-#     with pytest.raises(AssertionError, match=ermesg):
-#         read._validate_source('/valid/dirpath/with/non-default/filename/pattern.h5', default_pattern)

From f75fca5ac6b4014f1f6ba537798bb9c552d2ce4e Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Mon, 29 Jan 2024 17:42:24 -0500
Subject: [PATCH 08/22] update bad input type test

---
 icepyx/core/read.py       |  5 +++--
 icepyx/tests/test_read.py | 20 +++++++++-----------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index 7582dbd0a..367843f3d 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -100,9 +100,9 @@ def _get_track_type_str(grp_path) -> (str, str, str):
     return track_str, spot_dim_name, spot_var_name
 
 
-def _parse_source(data_source, glob_kwargs) -> list:
+def _parse_source(data_source, glob_kwargs=None) -> list:
     """
-    Parse the data_source input based on type.
+    Parse the user's data_source input based on type.
 
     Returns
     -------
@@ -113,6 +113,7 @@ def _parse_source(data_source, glob_kwargs) -> list:
     from pathlib import Path
 
     if isinstance(data_source, list):
+        assert [isinstance(f, (str, Path)) for f in data_source]
         # if data_source is a list pass that directly to _filelist
         filelist = data_source
     elif os.path.isdir(data_source):
diff --git a/icepyx/tests/test_read.py b/icepyx/tests/test_read.py
index d6431cc67..50182e37c 100644
--- a/icepyx/tests/test_read.py
+++ b/icepyx/tests/test_read.py
@@ -4,17 +4,15 @@
 import icepyx.core.read as read
 
 
-def test_validate_source_str_given_as_list():
-    ermesg = "You must enter your input as a string."
-    with pytest.raises(AssertionError, match=ermesg):
-        read._validate_source(["/path/to/valid/ATL06_file.py"])
-
-
-def test_validate_source_str_not_a_dir_or_file():
-    ermesg = "Your data source string is not a valid data source."
-    with pytest.raises(AssertionError, match=ermesg):
-        read._validate_source("./fake/dirpath")
-        read._validate_source("./fake_file.h5")
+# note isdir will issue a TypeError if a tuple is passed
+def test_parse_source_bad_input_type():
+    ermesg = (
+        "data_source should be a list of files, a directory, the path to a file, "
+        "or a glob string."
+    )
+    with pytest.raises(TypeError, match=ermesg):
+        read._parse_source(150)
+        read._parse_source({"myfiles": "./my_valid_path/file.h5"})
 
 
 @pytest.mark.parametrize(

From f1fa1b32e4289a60a4d7c5895249ee0853c6b690 Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Mon, 29 Jan 2024 17:51:22 -0500
Subject: [PATCH 09/22] remove some other notes and old comments

---
 icepyx/core/read.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index 367843f3d..c0e8fa8d8 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -635,8 +635,6 @@ def _build_dataset_template(self, file):
 
         It may be possible to expand this function to provide multiple templates.
         """
-        # NOTE: use the hdf5 library to grab the attr for the product specifier
-        # can ultimately then use it to check against user specified one or merge strategies (or to return a list of ds)
 
         is2ds = xr.Dataset(
             coords=dict(

From 6a1bcedb2c98d53088c94e6b2ef9469c8ba5293c Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Mon, 29 Jan 2024 18:20:59 -0500
Subject: [PATCH 10/22] remove unneeded import

---
 icepyx/tests/test_read.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/icepyx/tests/test_read.py b/icepyx/tests/test_read.py
index 50182e37c..c6fb50ea7 100644
--- a/icepyx/tests/test_read.py
+++ b/icepyx/tests/test_read.py
@@ -1,6 +1,5 @@
 import pytest
 
-from icepyx.core.read import Read
 import icepyx.core.read as read
 
 

From 2ba4cd3c3b2b55cdb0e959b8f6114b7053b939d6 Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Mon, 29 Jan 2024 19:06:22 -0500
Subject: [PATCH 11/22] shorten a few lines for linting

---
 icepyx/core/read.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index c0e8fa8d8..d40a41e16 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -36,7 +36,8 @@ def _make_np_datetime(df, keyword):
 
     Example
     -------
-    >>> ds = xr.Dataset({"time": ("time_idx", [b'2019-01-11T05:26:31.323722Z'])}, coords={"time_idx": [0]})
+    >>> ds = xr.Dataset({"time": ("time_idx", [b'2019-01-11T05:26:31.323722Z'])},
+    >>>                  coords={"time_idx": [0]})
     >>> _make_np_datetime(ds, "time")
     <xarray.Dataset>
     Dimensions:   (time_idx: 1)
@@ -48,7 +49,8 @@ def _make_np_datetime(df, keyword):
     """
 
     if df[keyword].str.endswith("Z"):
-        # manually remove 'Z' from datetime to allow conversion to np.datetime64 object (support for timezones is deprecated and causes a seg fault)
+        # manually remove 'Z' from datetime to allow conversion to np.datetime64 object
+        # (support for timezones is deprecated and causes a seg fault)
         df.update({keyword: df[keyword].str[:-1].astype(np.datetime64)})
 
     else:
@@ -170,7 +172,8 @@ class Read(EarthdataAuthMixin):
         The List must be a list of strings, each of which is the path of a single file.
 
     glob_kwargs : dict, default {}
-        Additional arguments to be passed into the [glob.glob()](https://docs.python.org/3/library/glob.html#glob.glob)function
+        Additional arguments to be passed into the
+        [glob.glob()](https://docs.python.org/3/library/glob.html#glob.glob)function
 
     out_obj_type : object, default xarray.Dataset
         The desired format for the data to be read in.
@@ -311,7 +314,8 @@ def __init__(
     def vars(self):
         """
         Return the variables object associated with the data being read in.
-        This instance is generated from the source file or first file in a list of input files (when source is a directory).
+        This instance is generated from the source file or first file in a list of input files
+        (when source is a directory).
 
         See Also
         --------

From 420acb3c850f3ea1011af56683d3400e1337f3cd Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Tue, 30 Jan 2024 11:03:24 -0500
Subject: [PATCH 12/22] use ellipses in two-line docstring code example

---
 icepyx/core/read.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index d40a41e16..f2cbd53ad 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -37,7 +37,7 @@ def _make_np_datetime(df, keyword):
     Example
     -------
     >>> ds = xr.Dataset({"time": ("time_idx", [b'2019-01-11T05:26:31.323722Z'])},
-    >>>                  coords={"time_idx": [0]})
+    ...                  coords={"time_idx": [0]})
     >>> _make_np_datetime(ds, "time")
     <xarray.Dataset>
     Dimensions:   (time_idx: 1)

From 487f5a500256ad1e4ae255c34ea1396025527f18 Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Thu, 1 Feb 2024 13:55:59 -0500
Subject: [PATCH 13/22] remove previously removed modules from docs

---
 doc/source/user_guide/documentation/components.rst | 8 --------
 doc/source/user_guide/documentation/query.rst      | 1 -
 2 files changed, 9 deletions(-)

diff --git a/doc/source/user_guide/documentation/components.rst b/doc/source/user_guide/documentation/components.rst
index dea41a970..93cfb0397 100644
--- a/doc/source/user_guide/documentation/components.rst
+++ b/doc/source/user_guide/documentation/components.rst
@@ -26,14 +26,6 @@ granules
    :members:
    :undoc-members:
    :show-inheritance:
-
-is2cat
-------
-
-.. automodule:: icepyx.core.is2cat
-   :members:
-   :undoc-members:
-   :show-inheritance:
    
 is2ref
 ------
diff --git a/doc/source/user_guide/documentation/query.rst b/doc/source/user_guide/documentation/query.rst
index 168986548..9973c2dab 100644
--- a/doc/source/user_guide/documentation/query.rst
+++ b/doc/source/user_guide/documentation/query.rst
@@ -23,7 +23,6 @@ Attributes
    Query.cycles
    Query.dates
    Query.end_time
-   Query.file_vars
    Query.granules
    Query.order_vars
    Query.product

From 86a4ad70db514c82275a29142492041ceac604e1 Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Mon, 5 Feb 2024 22:04:14 -0500
Subject: [PATCH 14/22] fix longline errors except for deprecations

---
 icepyx/core/read.py | 47 +++++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index f2cbd53ad..03d9228a1 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -1,4 +1,3 @@
-import fnmatch
 import glob
 import os
 import sys
@@ -208,7 +207,8 @@ class Read(EarthdataAuthMixin):
     Reading all files in a directory
     >>> ipx.Read('/path/to/data/') # doctest: +SKIP
 
-    Reading files that match a particular pattern (here, all .h5 files that start with `processed_ATL06_`).
+    Reading files that match a particular pattern
+    (here, all .h5 files that start with `processed_ATL06_`).
     >>> ipx.Read('/path/to/data/processed_ATL06_*.h5') # doctest: +SKIP
 
     Reading a specific list of files
@@ -368,7 +368,8 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict):
             the second list contains the second portion of the group name, etc.
             "none" is used to fill in where paths are shorter than the longest path.
         wanted_dict : dict
-            Dictionary with variable names as keys and a list of group + variable paths containing those variables as values.
+            Dictionary with variable names as keys and a list of group +
+            variable paths containing those variables as values.
 
         Returns
         -------
@@ -461,7 +462,8 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict):
                     )
                 )
 
-                # for the subgoups where there is 1d delta time data, make sure that the cycle number is still a coordinate for merging
+                # for the subgoups where there is 1d delta time data,
+                # make sure that the cycle number is still a coordinate for merging
                 try:
                     ds = ds.assign_coords(
                         {
@@ -504,14 +506,16 @@ def _combine_nested_vars(is2ds, ds, grp_path, wanted_dict):
         grp_path : str
             hdf5 group path read into ds
         wanted_dict : dict
-            Dictionary with variable names as keys and a list of group + variable paths containing those variables as values.
+            Dictionary with variable names as keys and a list of group +
+            variable paths containing those variables as values.
 
         Returns
         -------
         Xarray Dataset with variables from the ds variable group added.
         """
 
-        # Dev Goal: improve this type of iterating to minimize amount of looping required. Would a path handling library be useful here?
+        # Dev Goal: improve this type of iterating to minimize amount of looping required.
+        # Would a path handling library be useful here?
         grp_spec_vars = [
             k for k, v in wanted_dict.items() if any(f"{grp_path}/{k}" in x for x in v)
         ]
@@ -543,7 +547,8 @@ def _combine_nested_vars(is2ds, ds, grp_path, wanted_dict):
 
     def load(self):
         """
-        Create a single Xarray Dataset containing the data from one or more files and/or ground tracks.
+        Create a single Xarray Dataset containing the data from one or more
+        files and/or ground tracks.
         Uses icepyx's ICESat-2 data product awareness and Xarray's `combine_by_coords` function.
 
         All items in the wanted variables list will be loaded from the files into memory.
@@ -657,7 +662,8 @@ def _read_single_grp(self, file, grp_path):
         ----------
         file : str
             Full path to ICESat-2 data file.
-            Currently tested for locally downloaded files; untested but hopefully works for s3 stored cloud files.
+            Currently tested for locally downloaded files;
+            untested but hopefully works for s3 stored cloud files.
         grp_path : str
             Full string to a variable group.
             E.g. 'gt1l/land_ice_segments'
@@ -677,23 +683,27 @@ def _read_single_grp(self, file, grp_path):
 
     def _build_single_file_dataset(self, file, groups_list):
         """
-        Create a single xarray dataset with all of the wanted variables/groups from the wanted var list for a single data file/url.
+        Create a single xarray dataset with all of the wanted variables/groups
+        from the wanted var list for a single data file/url.
 
         Parameters
         ----------
         file : str
             Full path to ICESat-2 data file.
-            Currently tested for locally downloaded files; untested but hopefully works for s3 stored cloud files.
+            Currently tested for locally downloaded files;
+            untested but hopefully works for s3 stored cloud files.
 
         groups_list : list of strings
             List of full paths to data variables within the file.
-            e.g. ['orbit_info/sc_orient', 'gt1l/land_ice_segments/h_li', 'gt1l/land_ice_segments/latitude', 'gt1l/land_ice_segments/longitude']
+            e.g. ['orbit_info/sc_orient', 'gt1l/land_ice_segments/h_li',
+            'gt1l/land_ice_segments/latitude', 'gt1l/land_ice_segments/longitude']
 
         Returns
         -------
         Xarray Dataset
         """
-        # DEVNOTE: if and elif does not actually apply wanted variable list, and has not been tested for merging multiple files into one ds
+        # DEVNOTE: if and elif does not actually apply wanted variable list,
+        # and has not been tested for merging multiple files into one ds
         # if a gridded product
         # TODO: all products need to be tested, and quicklook products added or explicitly excluded
         # Level 3b, gridded (netcdf): ATL14, 15, 16, 17, 18, 19, 20, 21
@@ -720,13 +730,14 @@ def _build_single_file_dataset(self, file, groups_list):
             )
             wanted_groups_set = set(wanted_groups)
 
-            # orbit_info is used automatically as the first group path so the info is available for the rest of the groups
+            # orbit_info is used automatically as the first group path
+            # so the info is available for the rest of the groups
             # wanted_groups_set.remove("orbit_info")
             wanted_groups_set.remove("ancillary_data")
             # Note: the sorting is critical for datasets with highly nested groups
             wanted_groups_list = ["ancillary_data"] + sorted(wanted_groups_set)
 
-            # returns the wanted groups as a list of lists with group path string elements separated
+            # returns wanted groups as a list of lists with group path string elements separated
             _, wanted_groups_tiered = Variables.parse_var_list(
                 groups_list, tiered=True, tiered_vars=True
             )
@@ -751,14 +762,15 @@ def _build_single_file_dataset(self, file, groups_list):
                 groups_list, tiered=False
             )
             wanted_groups_set = set(wanted_groups)
-            # orbit_info is used automatically as the first group path so the info is available for the rest of the groups
+            # orbit_info is used automatically as the first group path
+            # so the info is available for the rest of the groups
             wanted_groups_set.remove("orbit_info")
             wanted_groups_set.remove("ancillary_data")
             # Note: the sorting is critical for datasets with highly nested groups
             wanted_groups_list = ["orbit_info", "ancillary_data"] + sorted(
                 wanted_groups_set
             )
-            # returns the wanted groups as a list of lists with group path string elements separated
+            # returns wanted groups as a list of lists with group path string elements separated
             _, wanted_groups_tiered = Variables.parse_var_list(
                 groups_list, tiered=True, tiered_vars=True
             )
@@ -771,7 +783,8 @@ def _build_single_file_dataset(self, file, groups_list):
                     is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict
                 )
 
-                # if there are any deeper nested variables, get those so they have actual coordinates and add them
+                # if there are any deeper nested variables,
+                # get those so they have actual coordinates and add them
                 # this may apply to (at a minimum): ATL08
                 if any(grp_path in grp_path2 for grp_path2 in wanted_groups_list):
                     for grp_path2 in wanted_groups_list:

From 734a55e79cbf3499d4e0e8769eb0bd133ea8f30f Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Mon, 5 Feb 2024 22:07:13 -0500
Subject: [PATCH 15/22] remove unused imports

---
 icepyx/tests/test_validate_inputs.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/icepyx/tests/test_validate_inputs.py b/icepyx/tests/test_validate_inputs.py
index b776a7fc8..4d0ea0bd5 100644
--- a/icepyx/tests/test_validate_inputs.py
+++ b/icepyx/tests/test_validate_inputs.py
@@ -1,7 +1,4 @@
 import pytest
-import warnings
-import datetime as dt
-import numpy as np
 
 import icepyx.core.validate_inputs as val
 

From 8fa662d4181758e10fff9d5bf45d45232940624c Mon Sep 17 00:00:00 2001
From: Rachel Wegener <rachel.e.wegener@gmail.com>
Date: Thu, 8 Feb 2024 15:23:11 +0000
Subject: [PATCH 16/22] move check 0 files to parse source

---
 icepyx/core/read.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index 03d9228a1..398fee8bd 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -137,6 +137,13 @@ def _parse_source(data_source, glob_kwargs=None) -> list:
     # Remove any directories from the list (these get generated during recursive
     # glob search)
     filelist = [f for f in filelist if not os.path.isdir(f)]
+    
+    # Make sure a non-zero number of files were found
+    if len(filelist) == 0:
+        raise KeyError(
+                "No files found matching the specified `data_source`. Check your glob "
+                "string or file list."
+            )
 
     return filelist
 
@@ -261,6 +268,8 @@ def __init__(
             # If the path is an s3 path set the respective element of self.is_s3 to True
             if file_.startswith("s3"):
                 self.is_s3[i] = True
+            # set up 
+            if True in self.is_s3:
                 auth = self.auth
             else:
                 auth = None
@@ -283,7 +292,7 @@ def __init__(
             )
             _confirm_proceed()
 
-        # Raise warnings or errors for multiple products or products not matching the user-specified product
+        # Raise error if multiple products given
         all_products = list(set(product_dict.values()))
         if len(all_products) > 1:
             raise TypeError(
@@ -291,14 +300,9 @@ def __init__(
                 "Please provide a valid `data_source` parameter indicating files of a single "
                 "product"
             )
-        elif len(all_products) == 0:
-            raise TypeError(
-                "No files found matching the specified `data_source`. Check your glob "
-                "string or file list."
-            )
-        else:
-            # Assign the identified product to the property
-            self._product = all_products[0]
+        
+        # Assign the identified product to the property
+        self._product = all_products[0]
 
         if out_obj_type is not None:
             print(

From a9a46c5022ffe92d711b0a9ab9ee3e49e0c3fcdf Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 8 Feb 2024 15:23:30 +0000
Subject: [PATCH 17/22] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 icepyx/core/read.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index 398fee8bd..80caaf6d8 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -137,13 +137,13 @@ def _parse_source(data_source, glob_kwargs=None) -> list:
     # Remove any directories from the list (these get generated during recursive
     # glob search)
     filelist = [f for f in filelist if not os.path.isdir(f)]
-    
+
     # Make sure a non-zero number of files were found
     if len(filelist) == 0:
         raise KeyError(
-                "No files found matching the specified `data_source`. Check your glob "
-                "string or file list."
-            )
+            "No files found matching the specified `data_source`. Check your glob "
+            "string or file list."
+        )
 
     return filelist
 
@@ -268,7 +268,7 @@ def __init__(
             # If the path is an s3 path set the respective element of self.is_s3 to True
             if file_.startswith("s3"):
                 self.is_s3[i] = True
-            # set up 
+            # set up
             if True in self.is_s3:
                 auth = self.auth
             else:
@@ -300,7 +300,7 @@ def __init__(
                 "Please provide a valid `data_source` parameter indicating files of a single "
                 "product"
             )
-        
+
         # Assign the identified product to the property
         self._product = all_products[0]
 

From 69717ad3c960f82f5378128d3a55a73466020d5f Mon Sep 17 00:00:00 2001
From: Rachel Wegener <rachel.e.wegener@gmail.com>
Date: Thu, 8 Feb 2024 15:26:22 +0000
Subject: [PATCH 18/22] remove a line I didn't mean to commit

---
 icepyx/core/read.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index 398fee8bd..41293dcf6 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -268,8 +268,6 @@ def __init__(
             # If the path is an s3 path set the respective element of self.is_s3 to True
             if file_.startswith("s3"):
                 self.is_s3[i] = True
-            # set up 
-            if True in self.is_s3:
                 auth = self.auth
             else:
                 auth = None

From c7cc03ed627fe22a37d51abeb8878cf51ccd634a Mon Sep 17 00:00:00 2001
From: Rachel Wegener <rachel.e.wegener@gmail.com>
Date: Thu, 8 Feb 2024 15:49:53 +0000
Subject: [PATCH 19/22] fix syntax error found while testing

---
 icepyx/core/is2ref.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py
index c51c631be..66ceaec1c 100644
--- a/icepyx/core/is2ref.py
+++ b/icepyx/core/is2ref.py
@@ -378,8 +378,10 @@ def extract_product(filepath, auth=None):
             # ATL14 saves the short_name as an array ['ATL14']
             product = product[0]
         product = _validate_product(product)
-    except KeyError:
-        raise "Unable to parse the product name from file metadata"
+    except KeyError as e:
+        raise Exception(
+            "Unable to parse the product name from file metadata"
+        ).with_traceback(e.__traceback__)
 
     # Close the file reader
     f.close()
@@ -421,8 +423,10 @@ def extract_version(filepath, auth=None):
         if isinstance(version, bytes):
             version = version.decode()
 
-    except KeyError:
-        raise "Unable to parse the version from file metadata"
+    except KeyError as e:
+        raise Exception(
+            "Unable to parse the version from file metadata"
+        ).with_traceback(e.__traceback__)
 
     # Close the file reader
     f.close()

From a64c5f99e4ac2569d75ecf8cd14a483b60da55ac Mon Sep 17 00:00:00 2001
From: GitHub Action <rwegener2@users.noreply.github.com>
Date: Thu, 8 Feb 2024 15:53:28 +0000
Subject: [PATCH 20/22] GitHub action UML generation auto-update

---
 doc/source/user_guide/documentation/classes_dev_uml.svg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/documentation/classes_dev_uml.svg b/doc/source/user_guide/documentation/classes_dev_uml.svg
index 09c112f5c..84fb2a450 100644
--- a/doc/source/user_guide/documentation/classes_dev_uml.svg
+++ b/doc/source/user_guide/documentation/classes_dev_uml.svg
@@ -235,7 +235,7 @@
 <polygon fill="none" stroke="black" points="1655.5,-496.5 1655.5,-760.5 2271.5,-760.5 2271.5,-496.5 1655.5,-496.5"/>
 <text text-anchor="start" x="1945" y="-745.3" font-family="Times,serif" font-size="14.00">Read</text>
 <polyline fill="none" stroke="black" points="1655.5,-737.5 2271.5,-737.5 "/>
-<text text-anchor="start" x="1899.5" y="-722.3" font-family="Times,serif" font-size="14.00">_filelist : list</text>
+<text text-anchor="start" x="1899.5" y="-722.3" font-family="Times,serif" font-size="14.00">_filelist</text>
 <text text-anchor="start" x="1899.5" y="-707.3" font-family="Times,serif" font-size="14.00">_out_obj : Dataset</text>
 <text text-anchor="start" x="1899.5" y="-692.3" font-family="Times,serif" font-size="14.00">_product</text>
 <text text-anchor="start" x="1899.5" y="-677.3" font-family="Times,serif" font-size="14.00">_read_vars</text>

From cae97016e1ce5ba6796f5944e8c5ce146e975284 Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Thu, 8 Feb 2024 14:56:18 -0500
Subject: [PATCH 21/22] fix no files test

---
 icepyx/tests/test_read.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/icepyx/tests/test_read.py b/icepyx/tests/test_read.py
index c6fb50ea7..20807c410 100644
--- a/icepyx/tests/test_read.py
+++ b/icepyx/tests/test_read.py
@@ -14,6 +14,15 @@ def test_parse_source_bad_input_type():
         read._parse_source({"myfiles": "./my_valid_path/file.h5"})
 
 
+def test_parse_source_no_files():
+    ermesg = (
+        "No files found matching the specified `data_source`. Check your glob "
+        "string or file list."
+    )
+    with pytest.raises(KeyError, match=ermesg):
+        read._parse_source("./icepyx/bogus_glob")
+
+
 @pytest.mark.parametrize(
     "source, expect",
     [
@@ -66,10 +75,6 @@ def test_parse_source_bad_input_type():
             "./icepyx/core/is2*.py",
             ["./icepyx/core/is2ref.py"],
         ),
-        (  # check bad input
-            "./icepyx/bogus_glob",
-            [],
-        ),
     ],
 )
 def test_parse_source(source, expect):

From f8b7f0b592e0e33ece2e40cb2d699a4fabce826b Mon Sep 17 00:00:00 2001
From: Jessica Scheick <jbscheick@gmail.com>
Date: Thu, 8 Feb 2024 15:57:00 -0500
Subject: [PATCH 22/22] update default glob_kwargs to dict

---
 icepyx/core/read.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icepyx/core/read.py b/icepyx/core/read.py
index ed8871637..8cc5afd21 100644
--- a/icepyx/core/read.py
+++ b/icepyx/core/read.py
@@ -101,7 +101,7 @@ def _get_track_type_str(grp_path) -> (str, str, str):
     return track_str, spot_dim_name, spot_var_name
 
 
-def _parse_source(data_source, glob_kwargs=None) -> list:
+def _parse_source(data_source, glob_kwargs={}) -> list:
     """
     Parse the user's data_source input based on type.