From 1d533417927d735f05882f82ef379898288c5ee4 Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Mon, 25 Sep 2023 11:45:10 -0400 Subject: [PATCH 01/14] update QUEST and GenQuery classes for argo integration (#441) * Adding argo search and download script * Create get_argo.py Download the 'classic' argo data with physical variables only * begin implementing argo dataset * 1st draft implementing argo dataset * implement search_data for physical argo * doctests and general cleanup for physical argo query * beginning of BGC Argo download * parse BGC profiles into DF * plan to query BGC profiles * validate BGC param input function * order BGC params in order in which they should be queried * fix bug in parse_into_df() - init blank df to take in union of params from all profiles * identify profiles from initial API request containing all required params * creates df with only profiles that contain all user specified params Need to dload additional params * modified to populate prof df by querying individual profiles * finished up BGC argo download! * assert bounding box type in Argo init, begin framework for unit tests * Adding argo search and download script * Create get_argo.py Download the 'classic' argo data with physical variables only * begin implementing argo dataset * 1st draft implementing argo dataset * implement search_data for physical argo * doctests and general cleanup for physical argo query * beginning of BGC Argo download * parse BGC profiles into DF * plan to query BGC profiles * validate BGC param input function * order BGC params in order in which they should be queried * fix bug in parse_into_df() - init blank df to take in union of params from all profiles * identify profiles from initial API request containing all required params * creates df with only profiles that contain all user specified params Need to dload additional params * modified to populate prof df by querying individual profiles * finished up BGC argo download! * assert bounding box type in Argo init, begin framework for unit tests * need to confirm spatial extent is bbox * begin test case for available profiles * add tests for argo.py * add typing, add example json, and use it to test parsing * update argo to submit successful api request (update keys and values submitted) * first pass at porting argo over to metadata+per profile download (WIP) * basic working argo script * simplify parameter validation (ordered list no longer needed) * add option to delete existing data before new download * continue cleaning up argo.py * fix download_by_profile to properly store all downloaded data * remove old get_argo.py script * remove _filter_profiles function in favor of submitting data kwarg in request * start filling in docstrings * clean up nearly duplicate functions * add more docstrings * get a few minimal argo tests working * add bgc argo params. begin adding merge for second download runs * some changes * WIP test commit to see if can push to GH * WIP handling argo merge issue * update profile to df to return df and move merging to get_dataframe * merge profiles with existing df * clean up docstrings and code * add test_argo.py * add prelim test case for adding to Argo df * remove sandbox files * remove bgc argo test file * update variables notebook from development * simplify import statements * quickfix for granules error * draft subpage on available QUEST datasets * small reference fix in text * add reference to top of .rst file * test argo df merge * add functionality to Quest class to pass search criteria to all datasets * add functionality to Quest class to pass search criteria to all datasets * update dataset docstrings; reorder argo.py to match * implement quest search+download for IS2 * move spatial and temporal properties from query to genquery * add query docstring test for cycles,tracks to test file * add quest test module * standardize print outputs for quest search and download; is2 download needs auth updates * remove extra files from this branch * comment out argo portions of quest for PR * remove argo-branch-only init file * remove argo script from branch * remove argo test file from branch * comment out another line of argo stuff * Update quest.py Added Docstrings to functions within quest.py and edited the primary docstring for the QUEST class here. Note I did not add Docstrings to the implicit __self__ function. * Update test_quest.py Added comments (not Docstrings) to test functions * Update dataset.py Minor edits to the doc strings * Update quest.py Edited docstrings * catch error with downloading datasets in Quest; template test case for multi dataset query --------- Co-authored-by: Kelsey Bisson <48059682+kelseybisson@users.noreply.github.com> Co-authored-by: Romina Co-authored-by: zachghiaccio Co-authored-by: Zach Fair <48361714+zachghiaccio@users.noreply.github.com> --- .../contributing/quest-available-datasets.rst | 25 ++ icepyx/core/query.py | 345 +++++++++--------- icepyx/quest/__init__.py | 0 icepyx/quest/dataset_scripts/dataset.py | 90 +++-- icepyx/quest/quest.py | 104 +++++- icepyx/tests/test_query.py | 12 + icepyx/tests/test_quest.py | 80 ++++ 7 files changed, 424 insertions(+), 232 deletions(-) create mode 100644 doc/source/contributing/quest-available-datasets.rst delete mode 100644 icepyx/quest/__init__.py create mode 100644 icepyx/tests/test_quest.py diff --git a/doc/source/contributing/quest-available-datasets.rst b/doc/source/contributing/quest-available-datasets.rst new file mode 100644 index 000000000..91a6283a0 --- /dev/null +++ b/doc/source/contributing/quest-available-datasets.rst @@ -0,0 +1,25 @@ +.. _quest_supported_label: + +QUEST Supported Datasets +======================== + +On this page, we outline the datasets that are supported by the QUEST module. Click on the links for each dataset to view information about the API and sensor/data platform used. + + +List of Datasets +---------------- + +* `Argo `_ + * The Argo mission involves a series of floats that are designed to capture vertical ocean profiles of temperature, salinity, and pressure down to ~2000 m. Some floats are in support of BGC-Argo, which also includes data relevant for biogeochemical applications: oxygen, nitrate, chlorophyll, backscatter, and solar irradiance. + * (Link Kelsey's paper here) + * (Link to example workbook here) + + +Adding a Dataset to QUEST +------------------------- + +Want to add a new dataset to QUEST? No problem! QUEST includes a template script (``dataset.py``) that may be used to create your own querying module for a dataset of interest. + +Guidelines on how to construct your dataset module may be found here: (link to be added) + +Once you have developed a script with the template, you may request for the module to be added to QUEST via Github. Please see the How to Contribute page :ref:`dev_guide_label` for instructions on how to contribute to icepyx. \ No newline at end of file diff --git a/icepyx/core/query.py b/icepyx/core/query.py index e8f1d8e7c..3459fd132 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -12,11 +12,9 @@ import icepyx.core.APIformatting as apifmt from icepyx.core.auth import EarthdataAuthMixin import icepyx.core.granules as granules -from icepyx.core.granules import Granules as Granules +# QUESTION: why doesn't from granules import Granules work, since granules=icepyx.core.granules? +from icepyx.core.granules import Granules import icepyx.core.is2ref as is2ref - -# QUESTION: why doesn't from granules import Granules as Granules work, since granules=icepyx.core.granules? -# from icepyx.core.granules import Granules import icepyx.core.spatial as spat import icepyx.core.temporal as tp import icepyx.core.validate_inputs as val @@ -148,6 +146,177 @@ def __str__(self): ) return str + # ---------------------------------------------------------------------- + # Properties + + @property + def temporal(self): + """ + Return the Temporal object containing date/time range information for the query object. + + See Also + -------- + temporal.Temporal.start + temporal.Temporal.end + temporal.Temporal + + Examples + -------- + >>> reg_a = GenQuery([-55, 68, -48, 71],['2019-02-20','2019-02-28']) + >>> print(reg_a.temporal) + Start date and time: 2019-02-20 00:00:00 + End date and time: 2019-02-28 23:59:59 + + >>> reg_a = GenQuery([-55, 68, -48, 71],cycles=['03','04','05','06','07'], tracks=['0849','0902']) + >>> print(reg_a.temporal) + ['No temporal parameters set'] + """ + + if hasattr(self, "_temporal"): + return self._temporal + else: + return ["No temporal parameters set"] + + @property + def spatial(self): + """ + Return the spatial object, which provides the underlying functionality for validating + and formatting geospatial objects. The spatial object has several properties to enable + user access to the stored spatial extent in multiple formats. + + See Also + -------- + spatial.Spatial.spatial_extent + spatial.Spatial.extent_type + spatial.Spatial.extent_file + spatial.Spatial + + Examples + -------- + >>> reg_a = ipx.GenQuery([-55, 68, -48, 71],['2019-02-20','2019-02-28']) + >>> reg_a.spatial # doctest: +SKIP + + + >>> print(reg_a.spatial) + Extent type: bounding_box + Coordinates: [-55.0, 68.0, -48.0, 71.0] + + """ + return self._spatial + + @property + def spatial_extent(self): + """ + Return an array showing the spatial extent of the query object. + Spatial extent is returned as an input type (which depends on how + you initially entered your spatial data) followed by the geometry data. + Bounding box data is [lower-left-longitude, lower-left-latitute, upper-right-longitude, upper-right-latitude]. + Polygon data is [longitude1, latitude1, longitude2, latitude2, + ... longitude_n,latitude_n, longitude1,latitude1]. + + Returns + ------- + tuple of length 2 + First tuple element is the spatial type ("bounding box" or "polygon"). + Second tuple element is the spatial extent as a list of coordinates. + + Examples + -------- + + # Note: coordinates returned as float, not int + >>> reg_a = GenQuery([-55, 68, -48, 71],['2019-02-20','2019-02-28']) + >>> reg_a.spatial_extent + ('bounding_box', [-55.0, 68.0, -48.0, 71.0]) + + >>> reg_a = GenQuery([(-55, 68), (-55, 71), (-48, 71), (-48, 68), (-55, 68)],['2019-02-20','2019-02-28']) + >>> reg_a.spatial_extent + ('polygon', [-55.0, 68.0, -55.0, 71.0, -48.0, 71.0, -48.0, 68.0, -55.0, 68.0]) + + # NOTE Is this where we wanted to put the file-based test/example? + # The test file path is: examples/supporting_files/simple_test_poly.gpkg + + See Also + -------- + Spatial.extent + Spatial.extent_type + Spatial.extent_as_gdf + + """ + + return (self._spatial._ext_type, self._spatial._spatial_ext) + + @property + def dates(self): + """ + Return an array showing the date range of the query object. + Dates are returned as an array containing the start and end datetime objects, inclusive, in that order. + + Examples + -------- + >>> reg_a = ipx.GenQuery([-55, 68, -48, 71],['2019-02-20','2019-02-28']) + >>> reg_a.dates + ['2019-02-20', '2019-02-28'] + + >>> reg_a = GenQuery([-55, 68, -48, 71]) + >>> reg_a.dates + ['No temporal parameters set'] + """ + if not hasattr(self, "_temporal"): + return ["No temporal parameters set"] + else: + return [ + self._temporal._start.strftime("%Y-%m-%d"), + self._temporal._end.strftime("%Y-%m-%d"), + ] # could also use self._start.date() + + @property + def start_time(self): + """ + Return the start time specified for the start date. + + Examples + -------- + >>> reg_a = ipx.GenQuery([-55, 68, -48, 71],['2019-02-20','2019-02-28']) + >>> reg_a.start_time + '00:00:00' + + >>> reg_a = ipx.GenQuery([-55, 68, -48, 71],['2019-02-20','2019-02-28'], start_time='12:30:30') + >>> reg_a.start_time + '12:30:30' + + >>> reg_a = GenQuery([-55, 68, -48, 71]) + >>> reg_a.start_time + ['No temporal parameters set'] + """ + if not hasattr(self, "_temporal"): + return ["No temporal parameters set"] + else: + return self._temporal._start.strftime("%H:%M:%S") + + @property + def end_time(self): + """ + Return the end time specified for the end date. + + Examples + -------- + >>> reg_a = ipx.GenQuery([-55, 68, -48, 71],['2019-02-20','2019-02-28']) + >>> reg_a.end_time + '23:59:59' + + >>> reg_a = ipx.GenQuery([-55, 68, -48, 71],['2019-02-20','2019-02-28'], end_time='10:20:20') + >>> reg_a.end_time + '10:20:20' + + >>> reg_a = GenQuery([-55, 68, -48, 71]) + >>> reg_a.end_time + ['No temporal parameters set'] + """ + if not hasattr(self, "_temporal"): + return ["No temporal parameters set"] + else: + return self._temporal._end.strftime("%H:%M:%S") + # DevGoal: update docs throughout to allow for polygon spatial extent # Note: add files to docstring once implemented @@ -333,174 +502,6 @@ def product_version(self): """ return self._version - @property - def temporal(self): - """ - Return the Temporal object containing date/time range information for the query object. - - See Also - -------- - temporal.Temporal.start - temporal.Temporal.end - temporal.Temporal - - Examples - -------- - >>> reg_a = Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28']) - >>> print(reg_a.temporal) - Start date and time: 2019-02-20 00:00:00 - End date and time: 2019-02-28 23:59:59 - - >>> reg_a = Query('ATL06',[-55, 68, -48, 71],cycles=['03','04','05','06','07'], tracks=['0849','0902']) - >>> print(reg_a.temporal) - ['No temporal parameters set'] - """ - - if hasattr(self, "_temporal"): - return self._temporal - else: - return ["No temporal parameters set"] - - @property - def spatial(self): - """ - Return the spatial object, which provides the underlying functionality for validating - and formatting geospatial objects. The spatial object has several properties to enable - user access to the stored spatial extent in multiple formats. - - See Also - -------- - spatial.Spatial.spatial_extent - spatial.Spatial.extent_type - spatial.Spatial.extent_file - spatial.Spatial - - Examples - -------- - >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28']) - >>> reg_a.spatial # doctest: +SKIP - - - >>> print(reg_a.spatial) - Extent type: bounding_box - Coordinates: [-55.0, 68.0, -48.0, 71.0] - - """ - return self._spatial - - @property - def spatial_extent(self): - """ - Return an array showing the spatial extent of the query object. - Spatial extent is returned as an input type (which depends on how - you initially entered your spatial data) followed by the geometry data. - Bounding box data is [lower-left-longitude, lower-left-latitute, upper-right-longitude, upper-right-latitude]. - Polygon data is [longitude1, latitude1, longitude2, latitude2, - ... longitude_n,latitude_n, longitude1,latitude1]. - - Returns - ------- - tuple of length 2 - First tuple element is the spatial type ("bounding box" or "polygon"). - Second tuple element is the spatial extent as a list of coordinates. - - Examples - -------- - - # Note: coordinates returned as float, not int - >>> reg_a = Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28']) - >>> reg_a.spatial_extent - ('bounding_box', [-55.0, 68.0, -48.0, 71.0]) - - >>> reg_a = Query('ATL06',[(-55, 68), (-55, 71), (-48, 71), (-48, 68), (-55, 68)],['2019-02-20','2019-02-28']) - >>> reg_a.spatial_extent - ('polygon', [-55.0, 68.0, -55.0, 71.0, -48.0, 71.0, -48.0, 68.0, -55.0, 68.0]) - - # NOTE Is this where we wanted to put the file-based test/example? - # The test file path is: examples/supporting_files/simple_test_poly.gpkg - - See Also - -------- - Spatial.extent - Spatial.extent_type - Spatial.extent_as_gdf - - """ - - return (self._spatial._ext_type, self._spatial._spatial_ext) - - @property - def dates(self): - """ - Return an array showing the date range of the query object. - Dates are returned as an array containing the start and end datetime objects, inclusive, in that order. - - Examples - -------- - >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28']) - >>> reg_a.dates - ['2019-02-20', '2019-02-28'] - - >>> reg_a = Query('ATL06',[-55, 68, -48, 71],cycles=['03','04','05','06','07'], tracks=['0849','0902']) - >>> reg_a.dates - ['No temporal parameters set'] - """ - if not hasattr(self, "_temporal"): - return ["No temporal parameters set"] - else: - return [ - self._temporal._start.strftime("%Y-%m-%d"), - self._temporal._end.strftime("%Y-%m-%d"), - ] # could also use self._start.date() - - @property - def start_time(self): - """ - Return the start time specified for the start date. - - Examples - -------- - >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28']) - >>> reg_a.start_time - '00:00:00' - - >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28'], start_time='12:30:30') - >>> reg_a.start_time - '12:30:30' - - >>> reg_a = Query('ATL06',[-55, 68, -48, 71],cycles=['03','04','05','06','07'], tracks=['0849','0902']) - >>> reg_a.start_time - ['No temporal parameters set'] - """ - if not hasattr(self, "_temporal"): - return ["No temporal parameters set"] - else: - return self._temporal._start.strftime("%H:%M:%S") - - @property - def end_time(self): - """ - Return the end time specified for the end date. - - Examples - -------- - >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28']) - >>> reg_a.end_time - '23:59:59' - - >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28'], end_time='10:20:20') - >>> reg_a.end_time - '10:20:20' - - >>> reg_a = Query('ATL06',[-55, 68, -48, 71],cycles=['03','04','05','06','07'], tracks=['0849','0902']) - >>> reg_a.end_time - ['No temporal parameters set'] - """ - if not hasattr(self, "_temporal"): - return ["No temporal parameters set"] - else: - return self._temporal._end.strftime("%H:%M:%S") - @property def cycles(self): """ diff --git a/icepyx/quest/__init__.py b/icepyx/quest/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/icepyx/quest/dataset_scripts/dataset.py b/icepyx/quest/dataset_scripts/dataset.py index 13e926229..e76081e08 100644 --- a/icepyx/quest/dataset_scripts/dataset.py +++ b/icepyx/quest/dataset_scripts/dataset.py @@ -1,4 +1,5 @@ import warnings +from icepyx.core.query import GenQuery warnings.filterwarnings("ignore") @@ -6,78 +7,75 @@ class DataSet: """ - Parent Class for all supported datasets (i.e. ATL03, ATL07, MODIS, etc.) - all sub classes must support the following methods for use in - colocated data class + Template parent class for all QUEST supported datasets (i.e. ICESat-2, Argo BGC, Argo, MODIS, etc.). + All sub-classes must support the following methods for use via the QUEST class. """ - def __init__(self, boundingbox, timeframe): + def __init__( + self, spatial_extent=None, date_range=None, start_time=None, end_time=None + ): """ - * use existing Icepyx functionality to initialise this - :param timeframe: datetime + Complete any dataset specific initializations (i.e. beyond space and time) required here. + For instance, ICESat-2 requires a product, and Argo requires parameters. + One can also check that the "default" space and time supplied by QUEST are the right format + (e.g. if the spatial extent must be a bounding box). """ - self.bounding_box = boundingbox - self.time_frame = timeframe - - def _fmt_coordinates(self): - # use icepyx geospatial module (icepyx core) raise NotImplementedError - def _fmt_timerange(self): + # ---------------------------------------------------------------------- + # Formatting API Inputs + + def _fmt_coordinates(self): """ - will return list of datetime objects [start_time, end_time] + Convert spatial extent into format needed by DataSet API, + if different than the formats available directly from SuperQuery. """ raise NotImplementedError - # todo: merge with Icepyx SuperQuery - def _validate_input(self): + def _fmt_timerange(self): """ - This may already be done in icepyx. - Not sure if we need this here + Convert temporal information into format needed by DataSet API, + if different than the formats available directly from SuperQuery. """ raise NotImplementedError - def search_data(self, delta_t): + # ---------------------------------------------------------------------- + # Validation + + def _validate_inputs(self): """ - query dataset given the spatio temporal criteria - and other params specic to the dataset + Create any additional validation functions for verifying inputs. + This function is not explicitly called by QUEST, + but is frequently needed for preparing API requests. + + See Also + -------- + quest.dataset_scripts.argo.Argo._validate_parameters """ raise NotImplementedError - def download(self, out_path): + # ---------------------------------------------------------------------- + # Querying and Getting Data + + def search_data(self): """ - once data is querried, the user may choose to dowload the - data locally + Query the dataset (i.e. search for available data) + given the spatiotemporal criteria and other parameters specific to the dataset. """ raise NotImplementedError - def visualize(self): + def download(self): """ - (once data is downloaded)?, makes a quick plot showing where - data are located - e.g. Plots location of Argo profile or highlights ATL03 photon track + Download the data to your local machine. """ raise NotImplementedError - def _add2colocated_plot(self): + # ---------------------------------------------------------------------- + # Working with Data + + def visualize(self): """ - Takes visualise() functionality and adds the plot to central - plot with other coincident data. This will be called by - show_area_overlap() in Colocateddata class + Tells QUEST how to plot data (for instance, which parameters to plot) on a basemap. + For ICESat-2, it might show a photon track, and for Argo it might show a profile location. """ raise NotImplementedError - - """ - The following are low priority functions - Not sure these are even worth keeping. Doesn't make sense for - all datasets. - """ - - # def get_meltpond_fraction(self): - # raise NotImplementedError - # - # def get_sea_ice_fraction(self): - # raise NotImplementedError - # - # def get_roughness(self): - # raise NotImplementedError diff --git a/icepyx/quest/quest.py b/icepyx/quest/quest.py index 2855a879c..c54e49b73 100644 --- a/icepyx/quest/quest.py +++ b/icepyx/quest/quest.py @@ -1,25 +1,26 @@ import matplotlib.pyplot as plt -from icepyx.core.query import GenQuery +from icepyx.core.query import GenQuery, Query + +# from icepyx.quest.dataset_scripts.argo import Argo # todo: implement the subclass inheritance class Quest(GenQuery): """ QUEST - Query Unify Explore SpatioTemporal - object to query, obtain, and perform basic - operations on datasets for combined analysis with ICESat-2 data products. - A new dataset can be added using the `dataset.py` template. - A list of already supported datasets is available at: - Expands the icepyx GenQuery superclass. + operations on datasets (i.e. Argo, BGC Argo, MODIS, etc) for combined analysis with ICESat-2 + data products. A new dataset can be added using the `dataset.py` template. + QUEST expands the icepyx GenQuery superclass. See the doc page for GenQuery for details on temporal and spatial input parameters. Parameters ---------- - projection : proj4 string - Not yet implemented - Ex text: a string name of projection to be used for plotting (e.g. 'Mercator', 'NorthPolarStereographic') + proj : proj4 string + Geospatial projection. + Not yet implemented Returns ------- @@ -38,7 +39,6 @@ class Quest(GenQuery): Date range: (2019-02-20 00:00:00, 2019-02-28 23:59:59) Data sets: None - # todo: make this work with real datasets Add datasets to the quest object. >>> reg_a.datasets = {'ATL07':None, 'Argo':None} @@ -61,13 +61,11 @@ def __init__( end_time=None, proj="Default", ): + """ + Tells QUEST to initialize data given the user input spatiotemporal data. + """ super().__init__(spatial_extent, date_range, start_time, end_time) self.datasets = {} - self.projection = self._determine_proj(proj) - - # todo: maybe move this to icepyx superquery class - def _determine_proj(self, proj): - return None def __str__(self): str = super(Quest, self).__str__() @@ -83,4 +81,82 @@ def __str__(self): return str + # ---------------------------------------------------------------------- + # Datasets + + def add_icesat2( + self, + product=None, + start_time=None, + end_time=None, + version=None, + cycles=None, + tracks=None, + files=None, + **kwargs, + ): + """ + Adds ICESat-2 datasets to QUEST structure. + """ + + query = Query( + product, + self._spatial.extent, + [self._temporal.start, self._temporal.end], + start_time, + end_time, + version, + cycles, + tracks, + files, + **kwargs, + ) + + self.datasets["icesat2"] = query + + # def add_argo(self, params=["temperature"], presRange=None): + + # argo = Argo(self._spatial, self._temporal, params, presRange) + # self.datasets["argo"] = argo + + # ---------------------------------------------------------------------- + # Methods (on all datasets) + + # error handling? what happens when one of i fails... + def search_all(self): + """ + Searches for requred dataset within platform (i.e. ICESat-2, Argo) of interest. + """ + print("\nSearching all datasets...") + + for i in self.datasets.values(): + print() + try: + # querying ICESat-2 data + if isinstance(i, Query): + print("---ICESat-2---") + msg = i.avail_granules() + print(msg) + else: # querying all other data sets + print(i) + i.search_data() + except: + dataset_name = type(i).__name__ + print("Error querying data from {0}".format(dataset_name)) + + # error handling? what happens when one of i fails... + def download_all(self, path=""): + ' ' 'Downloads requested dataset(s).' ' ' + print("\nDownloading all datasets...") + + for i in self.datasets.values(): + print() + if isinstance(i, Query): + print("---ICESat-2---") + msg = i.download_granules(path) + print(msg) + else: + i.download() + print(i) + # DEVNOTE: see colocated data branch and phyto team files for code that expands quest functionality diff --git a/icepyx/tests/test_query.py b/icepyx/tests/test_query.py index 55b25ef4a..7738c424a 100644 --- a/icepyx/tests/test_query.py +++ b/icepyx/tests/test_query.py @@ -41,6 +41,18 @@ def test_icepyx_boundingbox_query(): assert obs_tuple == exp_tuple +def test_temporal_properties_cycles_tracks(): + reg_a = ipx.Query( + "ATL06", + [-55, 68, -48, 71], + cycles=["03", "04", "05", "06", "07"], + tracks=["0849", "0902"], + ) + exp = ["No temporal parameters set"] + + assert [obs == exp for obs in (reg_a.dates, reg_a.start_time, reg_a.end_time)] + + # Tests need to add (given can't do them within docstrings/they're behind NSIDC login) # reqparams post-order # product_all_info diff --git a/icepyx/tests/test_quest.py b/icepyx/tests/test_quest.py new file mode 100644 index 000000000..043ee159e --- /dev/null +++ b/icepyx/tests/test_quest.py @@ -0,0 +1,80 @@ +import pytest +import re + +import icepyx as ipx +from icepyx.quest.quest import Quest + + +@pytest.fixture +def quest_instance(scope="module", autouse=True): + bounding_box = [-150, 30, -120, 60] + date_range = ["2022-06-07", "2022-06-14"] + my_quest = Quest(spatial_extent=bounding_box, date_range=date_range) + return my_quest + + +########## PER-DATASET ADDITION TESTS ########## + +# Paramaterize these add_dataset tests once more datasets are added +def test_add_is2(quest_instance): + # Add ATL06 as a test to QUEST + + prod = "ATL06" + quest_instance.add_icesat2(product=prod) + exp_key = "icesat2" + exp_type = ipx.Query + + obs = quest_instance.datasets + + assert type(obs) == dict + assert exp_key in obs.keys() + assert type(obs[exp_key]) == exp_type + assert quest_instance.datasets[exp_key].product == prod + + +# def test_add_argo(quest_instance): +# params = ["down_irradiance412", "temperature"] +# quest_instance.add_argo(params=params) +# exp_key = "argo" +# exp_type = ipx.quest.dataset_scripts.argo.Argo + +# obs = quest_instance.datasets + +# assert type(obs) == dict +# assert exp_key in obs.keys() +# assert type(obs[exp_key]) == exp_type +# assert quest_instance.datasets[exp_key].params == params + +# def test_add_multiple_datasets(): +# bounding_box = [-150, 30, -120, 60] +# date_range = ["2022-06-07", "2022-06-14"] +# my_quest = Quest(spatial_extent=bounding_box, date_range=date_range) +# +# # print(my_quest.spatial) +# # print(my_quest.temporal) +# +# # my_quest.add_argo(params=["down_irradiance412", "temperature"]) +# # print(my_quest.datasets["argo"].params) +# +# my_quest.add_icesat2(product="ATL06") +# # print(my_quest.datasets["icesat2"].product) +# +# print(my_quest) +# +# # my_quest.search_all() +# # +# # # this one still needs work for IS2 because of auth... +# # my_quest.download_all() + +########## ALL DATASET METHODS TESTS ########## + +# is successful execution enough here? +# each of the query functions should be tested in their respective modules +def test_search_all(quest_instance): + # Search and test all datasets + quest_instance.search_all() + + +def test_download_all(): + # this will require auth in some cases... + pass From d03f9fbbd06d8342c81f58af3c97540bbaaebaa0 Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Wed, 18 Oct 2023 13:05:10 -0400 Subject: [PATCH 02/14] temporarily disable OpenAltimetry API tests (#459) * add OA API warning * comment out tests that use OA API --------- Co-authored-by: GitHub Action --- .../documentation/classes_dev_uml.svg | 300 +++++++++--------- .../documentation/classes_user_uml.svg | 206 ++++++------ icepyx/core/visualization.py | 9 +- icepyx/tests/test_visualization.py | 3 +- 4 files changed, 263 insertions(+), 255 deletions(-) diff --git a/doc/source/user_guide/documentation/classes_dev_uml.svg b/doc/source/user_guide/documentation/classes_dev_uml.svg index fd5033938..34e13b41c 100644 --- a/doc/source/user_guide/documentation/classes_dev_uml.svg +++ b/doc/source/user_guide/documentation/classes_dev_uml.svg @@ -4,11 +4,11 @@ - - + + classes_dev_uml - + icepyx.core.auth.AuthenticationError @@ -30,32 +30,38 @@ icepyx.core.auth.EarthdataAuthMixin - -EarthdataAuthMixin - -_auth : Auth, NoneType -_s3_initial_ts : NoneType, datetime -_s3login_credentials : NoneType, dict -_session : NoneType, Session -auth -s3login_credentials -session - -__init__(auth) -__str__() -earthdata_login(uid, email, s3token): None + +EarthdataAuthMixin + +_auth : Auth, NoneType +_s3_initial_ts : NoneType, datetime +_s3login_credentials : NoneType, dict +_session : NoneType +auth +s3login_credentials +session + +__init__(auth) +__str__() +earthdata_login(uid, email, s3token): None icepyx.core.query.GenQuery - -GenQuery - -_spatial -_temporal - -__init__(spatial_extent, date_range, start_time, end_time) -__str__() + +GenQuery + +_spatial +_temporal +dates +end_time +spatial +spatial_extent +start_time +temporal + +__init__(spatial_extent, date_range, start_time, end_time) +__str__() @@ -75,38 +81,32 @@ icepyx.core.query.Query - -Query - -CMRparams -_CMRparams -_about_product -_cust_options : dict -_cycles : list -_file_vars -_granules -_order_vars -_prod : NoneType, str -_readable_granule_name : list -_reqparams -_source : str -_subsetparams : NoneType -_tracks : list -_version -cycles -dataset -dates -end_time -file_vars -granules -order_vars -product -product_version -reqparams -spatial -spatial_extent -start_time -temporal + +Query + +CMRparams +_CMRparams +_about_product +_cust_options : dict +_cycles : list +_file_vars +_granules +_order_vars +_prod : NoneType, str +_readable_granule_name : list +_reqparams +_source : str +_subsetparams : NoneType +_tracks : list +_version +cycles +dataset +file_vars +granules +order_vars +product +product_version +reqparams tracks __init__(product, spatial_extent, date_range, start_time, end_time, version, cycles, tracks, files, auth) @@ -125,15 +125,15 @@ icepyx.core.granules.Granules->icepyx.core.query.Query - - + + _granules icepyx.core.granules.Granules->icepyx.core.query.Query - - + + _granules @@ -160,17 +160,17 @@ icepyx.core.exceptions.QueryError - -QueryError - - - + +QueryError + + + icepyx.core.exceptions.NsidcQueryError->icepyx.core.exceptions.QueryError - - + + @@ -195,122 +195,122 @@ icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _CMRparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _reqparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _subsetparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _subsetparams icepyx.core.query.Query->icepyx.core.auth.EarthdataAuthMixin - - + + icepyx.core.query.Query->icepyx.core.query.GenQuery - - + + icepyx.core.read.Read - -Read - -_filelist : list, NoneType -_out_obj : Dataset -_pattern : str -_prod : str -_read_vars -_source_type : str -data_source -vars - -__init__(data_source, product, filename_pattern, catalog, out_obj_type) -_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) -_build_dataset_template(file) -_build_single_file_dataset(file, groups_list) -_check_source_for_pattern(source, filename_pattern) -_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) -_read_single_grp(file, grp_path) -load() + +Read + +_filelist : NoneType, list +_out_obj : Dataset +_pattern : str +_prod : str +_read_vars +_source_type : str +data_source +vars + +__init__(data_source, product, filename_pattern, catalog, out_obj_type) +_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) +_build_dataset_template(file) +_build_single_file_dataset(file, groups_list) +_check_source_for_pattern(source, filename_pattern) +_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) +_read_single_grp(file, grp_path) +load() icepyx.core.spatial.Spatial - -Spatial - -_ext_type : str -_gdf_spat : GeoDataFrame, DataFrame -_geom_file : NoneType -_spatial_ext -_xdateln -extent -extent_as_gdf -extent_file -extent_type - -__init__(spatial_extent) -__str__() -fmt_for_CMR() -fmt_for_EGI() + +Spatial + +_ext_type : str +_gdf_spat : GeoDataFrame +_geom_file : NoneType +_spatial_ext +_xdateln +extent +extent_as_gdf +extent_file +extent_type + +__init__(spatial_extent) +__str__() +fmt_for_CMR() +fmt_for_EGI() icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery - - -_spatial + + +_spatial icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery - - -_spatial + + +_spatial icepyx.core.temporal.Temporal - -Temporal - -_end : datetime -_start : datetime -end -start - -__init__(date_range, start_time, end_time) -__str__() + +Temporal + +_end : datetime +_start : datetime +end +start + +__init__(date_range, start_time, end_time) +__str__() icepyx.core.temporal.Temporal->icepyx.core.query.GenQuery - - -_temporal + + +_temporal @@ -339,36 +339,36 @@ icepyx.core.variables.Variables->icepyx.core.auth.EarthdataAuthMixin - - + + icepyx.core.variables.Variables->icepyx.core.query.Query - - + + _order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - -_order_vars + + +_order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - -_file_vars + + +_file_vars icepyx.core.variables.Variables->icepyx.core.read.Read - - -_read_vars + + +_read_vars diff --git a/doc/source/user_guide/documentation/classes_user_uml.svg b/doc/source/user_guide/documentation/classes_user_uml.svg index 1c9184379..640f76815 100644 --- a/doc/source/user_guide/documentation/classes_user_uml.svg +++ b/doc/source/user_guide/documentation/classes_user_uml.svg @@ -4,11 +4,11 @@ - - + + classes_user_uml - + icepyx.core.auth.AuthenticationError @@ -30,23 +30,29 @@ icepyx.core.auth.EarthdataAuthMixin - -EarthdataAuthMixin - -auth -s3login_credentials -session - -earthdata_login(uid, email, s3token): None + +EarthdataAuthMixin + +auth +s3login_credentials +session + +earthdata_login(uid, email, s3token): None icepyx.core.query.GenQuery - -GenQuery - - - + +GenQuery + +dates +end_time +spatial +spatial_extent +start_time +temporal + + @@ -64,24 +70,18 @@ icepyx.core.query.Query - -Query - -CMRparams -cycles -dataset -dates -end_time -file_vars -granules -order_vars -product -product_version -reqparams -spatial -spatial_extent -start_time -temporal + +Query + +CMRparams +cycles +dataset +file_vars +granules +order_vars +product +product_version +reqparams tracks avail_granules(ids, cycles, tracks, cloud) @@ -98,15 +98,15 @@ icepyx.core.granules.Granules->icepyx.core.query.Query - - + + _granules icepyx.core.granules.Granules->icepyx.core.query.Query - - + + _granules @@ -132,17 +132,17 @@ icepyx.core.exceptions.QueryError - -QueryError - - - + +QueryError + + + icepyx.core.exceptions.NsidcQueryError->icepyx.core.exceptions.QueryError - - + + @@ -161,99 +161,99 @@ icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _CMRparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _reqparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _subsetparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _subsetparams icepyx.core.query.Query->icepyx.core.auth.EarthdataAuthMixin - - + + icepyx.core.query.Query->icepyx.core.query.GenQuery - - + + icepyx.core.read.Read - -Read - -data_source -vars - -load() + +Read + +data_source +vars + +load() icepyx.core.spatial.Spatial - -Spatial - -extent -extent_as_gdf -extent_file -extent_type - -fmt_for_CMR() -fmt_for_EGI() + +Spatial + +extent +extent_as_gdf +extent_file +extent_type + +fmt_for_CMR() +fmt_for_EGI() icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery - - -_spatial + + +_spatial icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery - - -_spatial + + +_spatial icepyx.core.temporal.Temporal - -Temporal - -end -start - - + +Temporal + +end +start + + icepyx.core.temporal.Temporal->icepyx.core.query.GenQuery - - -_temporal + + +_temporal @@ -273,35 +273,35 @@ icepyx.core.variables.Variables->icepyx.core.auth.EarthdataAuthMixin - - + + icepyx.core.variables.Variables->icepyx.core.query.Query - - + + _order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - -_order_vars + + +_order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - -_file_vars + + +_file_vars icepyx.core.variables.Variables->icepyx.core.read.Read - - + + _read_vars diff --git a/icepyx/core/visualization.py b/icepyx/core/visualization.py index c6bef2333..a2b8fe5dc 100644 --- a/icepyx/core/visualization.py +++ b/icepyx/core/visualization.py @@ -4,6 +4,7 @@ import concurrent.futures import datetime import re +import warnings import backoff import dask.array as da @@ -332,7 +333,13 @@ def request_OA_data(self, paras) -> da.array: A dask array containing the ICESat-2 elevation data. """ - base_url = "https://openaltimetry.org/data/api/icesat2/level3a" + warnings.warn( + "NOTICE: visualizations requiring the OpenAltimetry API are currently (October 2023) ", + "unavailable while hosting of OpenAltimetry transitions from UCSD to NSIDC.", + "A ticket has been issued to restore programmatic API access.", + ) + + base_url = "http://openaltimetry.earthdatacloud.nasa.gov/data/api/icesat2" trackId, Date, cycle, bbox, product = paras # Generate API diff --git a/icepyx/tests/test_visualization.py b/icepyx/tests/test_visualization.py index 8056a453f..0a1f2fa43 100644 --- a/icepyx/tests/test_visualization.py +++ b/icepyx/tests/test_visualization.py @@ -70,7 +70,7 @@ def test_gran_paras(filename, expect): # 2023-01-27: for the commented test below, r (in visualization line 444) is returning None even though I can see OA data there via a browser - +""" @pytest.mark.parametrize( "product, date_range, bbox, expect", [ @@ -112,3 +112,4 @@ def test_visualization_orbits(product, bbox, cycles, tracks, expect): data_size = region_viz.parallel_request_OA().size assert data_size == expect +""" From ee8b79fda74b4c6d59683e3124092bfa5afa2e6f Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Wed, 18 Oct 2023 13:23:05 -0400 Subject: [PATCH 03/14] fix spot number calculation (#458) --------- Co-authored-by: GitHub Action --- icepyx/core/is2ref.py | 23 ++++++++++++++--------- icepyx/core/visualization.py | 6 +++--- icepyx/tests/test_is2ref.py | 16 ++++++++-------- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py index 52cf0e3a1..a3a0311bb 100644 --- a/icepyx/core/is2ref.py +++ b/icepyx/core/is2ref.py @@ -265,8 +265,11 @@ def _default_varlists(product): return common_list -# dev goal: check and test this function def gt2spot(gt, sc_orient): + warnings.warn( + "icepyx versions 0.8.0 and earlier used an incorrect spot number calculation." + "As a result, computations depending on spot number may be incorrect and should be redone." + ) assert gt in [ "gt1l", @@ -280,12 +283,13 @@ def gt2spot(gt, sc_orient): gr_num = np.uint8(gt[2]) gr_lr = gt[3] + # spacecraft oriented forward if sc_orient == 1: if gr_num == 1: if gr_lr == "l": - spot = 2 + spot = 6 elif gr_lr == "r": - spot = 1 + spot = 5 elif gr_num == 2: if gr_lr == "l": spot = 4 @@ -293,16 +297,17 @@ def gt2spot(gt, sc_orient): spot = 3 elif gr_num == 3: if gr_lr == "l": - spot = 6 + spot = 2 elif gr_lr == "r": - spot = 5 + spot = 1 + # spacecraft oriented backward elif sc_orient == 0: if gr_num == 1: if gr_lr == "l": - spot = 5 + spot = 1 elif gr_lr == "r": - spot = 6 + spot = 2 elif gr_num == 2: if gr_lr == "l": spot = 3 @@ -310,9 +315,9 @@ def gt2spot(gt, sc_orient): spot = 4 elif gr_num == 3: if gr_lr == "l": - spot = 1 + spot = 5 elif gr_lr == "r": - spot = 2 + spot = 6 if "spot" not in locals(): raise ValueError("Could not compute the spot number.") diff --git a/icepyx/core/visualization.py b/icepyx/core/visualization.py index a2b8fe5dc..32c81e3e7 100644 --- a/icepyx/core/visualization.py +++ b/icepyx/core/visualization.py @@ -334,9 +334,9 @@ def request_OA_data(self, paras) -> da.array: """ warnings.warn( - "NOTICE: visualizations requiring the OpenAltimetry API are currently (October 2023) ", - "unavailable while hosting of OpenAltimetry transitions from UCSD to NSIDC.", - "A ticket has been issued to restore programmatic API access.", + "NOTICE: visualizations requiring the OpenAltimetry API are currently (October 2023) " + "unavailable while hosting of OpenAltimetry transitions from UCSD to NSIDC." + "A ticket has been issued to restore programmatic API access." ) base_url = "http://openaltimetry.earthdatacloud.nasa.gov/data/api/icesat2" diff --git a/icepyx/tests/test_is2ref.py b/icepyx/tests/test_is2ref.py index 8d50568fe..b22709c98 100644 --- a/icepyx/tests/test_is2ref.py +++ b/icepyx/tests/test_is2ref.py @@ -556,12 +556,12 @@ def test_unsupported_default_varlist(): def test_gt2spot_sc_orient_1(): # gt1l obs = is2ref.gt2spot("gt1l", 1) - expected = 2 + expected = 6 assert obs == expected # gt1r obs = is2ref.gt2spot("gt1r", 1) - expected = 1 + expected = 5 assert obs == expected # gt2l @@ -576,24 +576,24 @@ def test_gt2spot_sc_orient_1(): # gt3l obs = is2ref.gt2spot("gt3l", 1) - expected = 6 + expected = 2 assert obs == expected # gt3r obs = is2ref.gt2spot("gt3r", 1) - expected = 5 + expected = 1 assert obs == expected def test_gt2spot_sc_orient_0(): # gt1l obs = is2ref.gt2spot("gt1l", 0) - expected = 5 + expected = 1 assert obs == expected # gt1r obs = is2ref.gt2spot("gt1r", 0) - expected = 6 + expected = 2 assert obs == expected # gt2l @@ -608,10 +608,10 @@ def test_gt2spot_sc_orient_0(): # gt3l obs = is2ref.gt2spot("gt3l", 0) - expected = 1 + expected = 5 assert obs == expected # gt3r obs = is2ref.gt2spot("gt3r", 0) - expected = 2 + expected = 6 assert obs == expected From a1a723dac6a5c1c4b531c0079440705fb7d27052 Mon Sep 17 00:00:00 2001 From: Whyjay Zheng Date: Thu, 19 Oct 2023 01:39:02 +0800 Subject: [PATCH 04/14] Fix a broken link in IS2_data_access.ipynb (#456) --- doc/source/example_notebooks/IS2_data_access.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/example_notebooks/IS2_data_access.ipynb b/doc/source/example_notebooks/IS2_data_access.ipynb index d9d50cdc0..0b4a12244 100644 --- a/doc/source/example_notebooks/IS2_data_access.ipynb +++ b/doc/source/example_notebooks/IS2_data_access.ipynb @@ -79,7 +79,7 @@ "\n", "There are three required inputs, depending on how you want to search for data. Two are required in all cases:\n", "- `short_name` = the data product of interest, known as its \"short name\".\n", - "See https://nsidc.org/data/icesat-2/data-sets for a list of the available data products.\n", + "See https://nsidc.org/data/icesat-2/products for a list of the available data products.\n", "- `spatial extent` = a region of interest to search within. This can be entered as a bounding box, polygon vertex coordinate pairs, or a polygon geospatial file (currently shp, kml, and gpkg are supported).\n", " - bounding box: Given in decimal degrees for the lower left longitude, lower left latitude, upper right longitude, and upper right latitude\n", " - polygon vertices: Given as longitude, latitude coordinate pairs of decimal degrees with the last entry a repeat of the first.\n", From d86cc9e23e61b6328d603e351656396e8fd33697 Mon Sep 17 00:00:00 2001 From: Rachel Wegener <35503632+rwegener2@users.noreply.github.com> Date: Wed, 18 Oct 2023 14:02:15 -0400 Subject: [PATCH 05/14] update Read input arguments (#444) * add filelist and product properties to Read object * deprecate filename_pattern and product class Read inputs * transition to data_source input as a string (including glob string) or list * update tutorial with changes and user guidance for using glob --------- Co-authored-by: Jessica Scheick --- .../example_notebooks/IS2_data_read-in.ipynb | 182 +++++++++++----- .../documentation/classes_dev_uml.svg | 122 +++++------ .../documentation/classes_user_uml.svg | 21 +- doc/source/user_guide/documentation/read.rst | 2 + icepyx/core/is2ref.py | 5 +- icepyx/core/read.py | 206 +++++++++++++----- icepyx/tests/test_is2ref.py | 4 +- 7 files changed, 356 insertions(+), 186 deletions(-) diff --git a/doc/source/example_notebooks/IS2_data_read-in.ipynb b/doc/source/example_notebooks/IS2_data_read-in.ipynb index 115c63044..9bbac368b 100644 --- a/doc/source/example_notebooks/IS2_data_read-in.ipynb +++ b/doc/source/example_notebooks/IS2_data_read-in.ipynb @@ -63,9 +63,8 @@ "metadata": {}, "outputs": [], "source": [ - "path_root = '/full/path/to/your/data/'\n", - "pattern = \"processed_ATL{product:2}_{datetime:%Y%m%d%H%M%S}_{rgt:4}{cycle:2}{orbitsegment:2}_{version:3}_{revision:2}.h5\"\n", - "reader = ipx.Read(path_root, \"ATL06\", pattern) # or ipx.Read(filepath, \"ATLXX\") if your filenames match the default pattern" + "path_root = '/full/path/to/your/ATL06_data/'\n", + "reader = ipx.Read(path_root)" ] }, { @@ -111,10 +110,9 @@ "\n", "Reading in ICESat-2 data with icepyx happens in a few simple steps:\n", "1. Let icepyx know where to find your data (this might be local files or urls to data in cloud storage)\n", - "2. Tell icepyx how to interpret the filename format\n", - "3. Create an icepyx `Read` object\n", - "4. Make a list of the variables you want to read in (does not apply for gridded products)\n", - "5. Load your data into memory (or read it in lazily, if you're using Dask)\n", + "2. Create an icepyx `Read` object\n", + "3. Make a list of the variables you want to read in (does not apply for gridded products)\n", + "4. Load your data into memory (or read it in lazily, if you're using Dask)\n", "\n", "We go through each of these steps in more detail in this notebook." ] @@ -168,21 +166,18 @@ { "cell_type": "markdown", "id": "e8da42c1", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "### Step 1: Set data source path\n", "\n", "Provide a full path to the data to be read in (i.e. opened).\n", "Currently accepted inputs are:\n", - "* a directory\n", - "* a single file\n", - "\n", - "All files to be read in *must* have a consistent filename pattern.\n", - "If a directory is supplied as the data source, all files in any subdirectories that match the filename pattern will be included.\n", - "\n", - "S3 bucket data access is currently under development, and requires you are registered with NSIDC as a beta tester for cloud-based ICESat-2 data.\n", - "icepyx is working to ensure a smooth transition to working with remote files.\n", - "We'd love your help exploring and testing these features as they become available!" + "* a string path to directory - all files from the directory will be opened\n", + "* a string path to single file - one file will be opened\n", + "* a list of filepaths - all files in the list will be opened\n", + "* a glob string (see [glob](https://docs.python.org/3/library/glob.html)) - any files matching the glob pattern will be opened" ] }, { @@ -208,86 +203,147 @@ { "cell_type": "code", "execution_count": null, - "id": "e683ebf7", + "id": "fac636c2-e0eb-4e08-adaa-8f47623e46a1", "metadata": {}, "outputs": [], "source": [ - "# urlpath = 's3://nsidc-cumulus-prod-protected/ATLAS/ATL03/004/2019/11/30/ATL03_20191130221008_09930503_004_01.h5'" + "# list_of_files = ['/my/data/ATL06/processed_ATL06_20190226005526_09100205_006_02.h5', \n", + "# '/my/other/data/ATL06/processed_ATL06_20191202102922_10160505_006_01.h5']" ] }, { "cell_type": "markdown", - "id": "92743496", + "id": "ba3ebeb0-3091-4712-b0f7-559ddb95ca5a", "metadata": { "user_expressions": [] }, "source": [ - "### Step 2: Create a filename pattern for your data files\n", + "#### Glob Strings\n", + "\n", + "[glob](https://docs.python.org/3/library/glob.html) is a Python library which allows users to list files in their file systems whose paths match a given pattern. Icepyx uses the glob library to give users greater flexibility over their input file lists.\n", + "\n", + "glob works using `*` and `?` as wildcard characters, where `*` matches any number of characters and `?` matches a single character. For example:\n", "\n", - "Files provided by NSIDC typically match the format `\"ATL{product:2}_{datetime:%Y%m%d%H%M%S}_{rgt:4}{cycle:2}{orbitsegment:2}_{version:3}_{revision:2}.h5\"` where the parameters in curly brackets indicate a parameter name (left of the colon) and character length or format (right of the colon).\n", - "Some of this information is used during data opening to help correctly read and label the data within the data structure, particularly when multiple files are opened simultaneously.\n", + "* `/this/path/*.h5`: refers to all `.h5` files in the `/this/path` folder (Example matches: \"/this/path/processed_ATL03_20191130221008_09930503_006_01.h5\" or \"/this/path/myfavoriteicsat-2file.h5\")\n", + "* `/this/path/*ATL07*.h5`: refers to all `.h5` files in the `/this/path` folder that have ATL07 in the filename. (Example matches: \"/this/path/ATL07-02_20221012220720_03391701_005_01.h5\" or \"/this/path/processed_ATL07.h5\")\n", + "* `/this/path/ATL??/*.h5`: refers to all `.h5` files that are in a subfolder of `/this/path` and a subdirectory of `ATL` followed by any 2 characters (Example matches: \"/this/path/ATL03/processed_ATL03_20191130221008_09930503_006_01.h5\", \"/this/path/ATL06/myfile.h5\")\n", "\n", - "By default, icepyx will assume your filenames follow the default format.\n", - "However, you can easily read in other ICESat-2 data files by supplying your own filename pattern.\n", - "For instance, `pattern=\"ATL{product:2}-{datetime:%Y%m%d%H%M%S}-Sample.h5\"`. A few example patterns are provided below." + "See the glob documentation or other online explainer tutorials for more in depth explanation, or advanced glob paths such as character classes and ranges." ] }, { - "cell_type": "code", - "execution_count": null, - "id": "7318abd0", - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "id": "20286c76-5632-4420-b2c9-a5a6b1952672", + "metadata": { + "user_expressions": [] + }, + "source": [ + "#### Recursive Directory Search" + ] + }, + { + "cell_type": "markdown", + "id": "632bd1ce-2397-4707-a63f-9d5d2fc02fbc", + "metadata": { + "user_expressions": [] + }, + "source": [ + "glob will not by default search all of the subdirectories for matching filepaths, but it has the ability to do so.\n", + "\n", + "If you would like to search recursively, you can achieve this by either:\n", + "1. passing the `recursive` argument into `glob_kwargs` and including `\\**\\` in your filepath\n", + "2. using glob directly to create a list of filepaths\n", + "\n", + "Each of these two methods are shown below." + ] + }, + { + "cell_type": "markdown", + "id": "da0cacd8-9ddc-4c31-86b6-167d850b989e", + "metadata": { + "user_expressions": [] + }, "source": [ - "# pattern = 'ATL06-{datetime:%Y%m%d%H%M%S}-Sample.h5'\n", - "# pattern = 'ATL{product:2}-{datetime:%Y%m%d%H%M%S}-Sample.h5'" + "Method 1: passing the `recursive` argument into `glob_kwargs`" ] }, { "cell_type": "code", "execution_count": null, - "id": "f43e8664", + "id": "e276b876-9ec7-4991-8520-05c97824b896", "metadata": {}, "outputs": [], "source": [ - "# pattern = \"ATL{product:2}_{datetime:%Y%m%d%H%M%S}_{rgt:4}{cycle:2}{orbitsegment:2}_{version:3}_{revision:2}.h5\"" + "ipx.Read('/path/to/**/folder', glob_kwargs={'recursive': True})" + ] + }, + { + "cell_type": "markdown", + "id": "f5a1e85e-fc4a-405f-9710-0cb61b827f2c", + "metadata": { + "user_expressions": [] + }, + "source": [ + "You can use `glob_kwargs` for any additional argument to Python's builtin `glob.glob` that you would like to pass in via icepyx." + ] + }, + { + "cell_type": "markdown", + "id": "76de9539-710c-49f6-9e9e-238849382c33", + "metadata": { + "user_expressions": [] + }, + "source": [ + "Method 2: using glob directly to create a list of filepaths" ] }, { "cell_type": "code", "execution_count": null, - "id": "992a77fb", + "id": "be79b0dd-efcf-4d50-bdb0-8e3ae8e8e38c", "metadata": {}, "outputs": [], "source": [ - "# grid_pattern = \"ATL{product:2}_GL_0311_{res:3}m_{version:3}_{revision:2}.nc\"" + "import glob" ] }, { "cell_type": "code", "execution_count": null, - "id": "6aec1a70", - "metadata": {}, + "id": "5d088571-496d-479a-9fb7-833ed7e98676", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "pattern = \"processed_ATL{product:2}_{datetime:%Y%m%d%H%M%S}_{rgt:4}{cycle:2}{orbitsegment:2}_{version:3}_{revision:2}.h5\"" + "list_of_files = glob.glob('/path/to/**/folder', recursive=True)\n", + "ipx.Read(list_of_files)" ] }, { "cell_type": "markdown", - "id": "4275b04c", + "id": "08df2874-7c54-4670-8f37-9135ea296ff5", "metadata": { "user_expressions": [] }, "source": [ - "### Step 3: Create an icepyx read object\n", + "```{admonition} Read Module Update\n", + "Previously, icepyx required two additional conditions: 1) a `product` argument and 2) that your files either matched the default `filename_pattern` or that the user provided their own `filename_pattern`. These two requirements have been removed. `product` is now read directly from the file metadata (the root group's `short_name` attribute). Flexibility to specify multiple files via the `filename_pattern` has been replaced with the [glob string](https://docs.python.org/3/library/glob.html) feature, and by allowing a list of filepaths as an argument.\n", "\n", - "The `Read` object has two required inputs:\n", - "- `path` = a string with the full file path or full directory path to your hdf5 (.h5) format files.\n", - "- `product` = the data product you're working with, also known as the \"short name\".\n", + "The `product` and `filename_pattern` arguments have been maintained for backwards compatibility, but will be fully removed in icepyx version 1.0.0.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "4275b04c", + "metadata": { + "user_expressions": [] + }, + "source": [ + "### Step 2: Create an icepyx read object\n", "\n", - "The `Read` object also accepts the optional keyword input:\n", - "- `pattern` = a formatted string indicating the filename pattern required for Intake's path_as_pattern argument." + "Using the `data_source` described in Step 1, we can create our Read object." ] }, { @@ -299,7 +355,17 @@ }, "outputs": [], "source": [ - "reader = ipx.Read(data_source=path_root, product=\"ATL06\", filename_pattern=pattern) # or ipx.Read(filepath, \"ATLXX\") if your filenames match the default pattern" + "reader = ipx.Read(data_source=path_root)" + ] + }, + { + "cell_type": "markdown", + "id": "7b2acfdb-75eb-4c64-b583-2ab19326aaee", + "metadata": { + "user_expressions": [] + }, + "source": [ + "The Read object now contains the list of matching files that will eventually be loaded into Python. You can inspect its properties, such as the files that were located or the identified product, directly on the Read object." ] }, { @@ -309,7 +375,17 @@ "metadata": {}, "outputs": [], "source": [ - "reader._filelist" + "reader.filelist" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7455ee3f-f9ab-486e-b4c7-2fa2314d4084", + "metadata": {}, + "outputs": [], + "source": [ + "reader.product" ] }, { @@ -319,7 +395,7 @@ "user_expressions": [] }, "source": [ - "### Step 4: Specify variables to be read in\n", + "### Step 3: Specify variables to be read in\n", "\n", "To load your data into memory or prepare it for analysis, icepyx needs to know which variables you'd like to read in.\n", "If you've used icepyx to download data from NSIDC with variable subsetting (which is the default), then you may already be familiar with the icepyx `Variables` module and how to create and modify lists of variables.\n", @@ -426,7 +502,7 @@ "user_expressions": [] }, "source": [ - "### Step 5: Loading your data\n", + "### Step 4: Loading your data\n", "\n", "Now that you've set up all the options, you're ready to read your ICESat-2 data into memory!" ] @@ -541,9 +617,9 @@ ], "metadata": { "kernelspec": { - "display_name": "general", + "display_name": "icepyx-dev", "language": "python", - "name": "general" + "name": "icepyx-dev" }, "language_info": { "codemirror_mode": { diff --git a/doc/source/user_guide/documentation/classes_dev_uml.svg b/doc/source/user_guide/documentation/classes_dev_uml.svg index 34e13b41c..0cd08c9e9 100644 --- a/doc/source/user_guide/documentation/classes_dev_uml.svg +++ b/doc/source/user_guide/documentation/classes_dev_uml.svg @@ -4,11 +4,11 @@ - + classes_dev_uml - + icepyx.core.auth.AuthenticationError @@ -139,38 +139,38 @@ icepyx.core.icesat2data.Icesat2Data - -Icesat2Data - - -__init__() + +Icesat2Data + + +__init__() icepyx.core.exceptions.NsidcQueryError - -NsidcQueryError - -errmsg -msgtxt : str - -__init__(errmsg, msgtxt) -__str__() + +NsidcQueryError + +errmsg +msgtxt : str + +__init__(errmsg, msgtxt) +__str__() icepyx.core.exceptions.QueryError - -QueryError - - - + +QueryError + + + icepyx.core.exceptions.NsidcQueryError->icepyx.core.exceptions.QueryError - - + + @@ -235,24 +235,24 @@ icepyx.core.read.Read - -Read - -_filelist : NoneType, list -_out_obj : Dataset -_pattern : str -_prod : str -_read_vars -_source_type : str -data_source -vars - -__init__(data_source, product, filename_pattern, catalog, out_obj_type) -_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) -_build_dataset_template(file) -_build_single_file_dataset(file, groups_list) -_check_source_for_pattern(source, filename_pattern) -_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) + +Read + +_filelist : NoneType, list +_out_obj : Dataset +_product : NoneType, str +_read_vars +filelist +product +vars + +__init__(data_source, product, filename_pattern, catalog, glob_kwargs, out_obj_type) +_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) +_build_dataset_template(file) +_build_single_file_dataset(file, groups_list) +_check_source_for_pattern(source, filename_pattern) +_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) +_extract_product(filepath) _read_single_grp(file, grp_path) load() @@ -366,30 +366,30 @@ icepyx.core.variables.Variables->icepyx.core.read.Read - - -_read_vars + + +_read_vars icepyx.core.visualization.Visualize - -Visualize - -bbox : list -cycles : NoneType -date_range : NoneType -product : NoneType, str -tracks : NoneType - -__init__(query_obj, product, spatial_extent, date_range, cycles, tracks) -generate_OA_parameters(): list -grid_bbox(binsize): list -make_request(base_url, payload) -parallel_request_OA(): da.array -query_icesat2_filelist(): tuple -request_OA_data(paras): da.array -viz_elevation(): (hv.DynamicMap, hv.Layout) + +Visualize + +bbox : list +cycles : NoneType +date_range : NoneType +product : NoneType, str +tracks : NoneType + +__init__(query_obj, product, spatial_extent, date_range, cycles, tracks) +generate_OA_parameters(): list +grid_bbox(binsize): list +make_request(base_url, payload) +parallel_request_OA(): da.array +query_icesat2_filelist(): tuple +request_OA_data(paras): da.array +viz_elevation(): (hv.DynamicMap, hv.Layout) diff --git a/doc/source/user_guide/documentation/classes_user_uml.svg b/doc/source/user_guide/documentation/classes_user_uml.svg index 640f76815..a9c116469 100644 --- a/doc/source/user_guide/documentation/classes_user_uml.svg +++ b/doc/source/user_guide/documentation/classes_user_uml.svg @@ -201,13 +201,14 @@ icepyx.core.read.Read - -Read - -data_source -vars - -load() + +Read + +filelist +product +vars + +load() @@ -300,9 +301,9 @@ icepyx.core.variables.Variables->icepyx.core.read.Read - - -_read_vars + + +_read_vars diff --git a/doc/source/user_guide/documentation/read.rst b/doc/source/user_guide/documentation/read.rst index a5beedf4e..68da03b1d 100644 --- a/doc/source/user_guide/documentation/read.rst +++ b/doc/source/user_guide/documentation/read.rst @@ -19,6 +19,8 @@ Attributes .. autosummary:: :toctree: ../../_icepyx/ + Read.filelist + Read.product Read.vars diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py index a3a0311bb..5faaef110 100644 --- a/icepyx/core/is2ref.py +++ b/icepyx/core/is2ref.py @@ -15,6 +15,7 @@ def _validate_product(product): """ Confirm a valid ICESat-2 product was specified """ + error_msg = "A valid product string was not provided. Check user input, if given, or file metadata." if isinstance(product, str): product = str.upper(product) assert product in [ @@ -40,9 +41,9 @@ def _validate_product(product): "ATL20", "ATL21", "ATL23", - ], "Please enter a valid product" + ], error_msg else: - raise TypeError("Please enter a product string") + raise TypeError(error_msg) return product diff --git a/icepyx/core/read.py b/icepyx/core/read.py index a7ee15db7..a85ee659b 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -1,7 +1,9 @@ import fnmatch +import glob import os import warnings +import h5py import numpy as np import xarray as xr @@ -10,8 +12,6 @@ from icepyx.core.variables import Variables as Variables from icepyx.core.variables import list_of_dict_vals -# from icepyx.core.query import Query - def _make_np_datetime(df, keyword): """ @@ -266,24 +266,28 @@ class Read: Parameters ---------- - data_source : string - A string with a full file path or full directory path to ICESat-2 hdf5 (.h5) format files. - Files within a directory must have a consistent filename pattern that includes the "ATL??" data product name. - Files must all be within a single directory. + data_source : string, List + A string or list which specifies the files to be read. The string can be either: 1) the path of a single file 2) the path to a directory or 3) a [glob string](https://docs.python.org/3/library/glob.html). + The List must be a list of strings, each of which is the path of a single file. product : string ICESat-2 data product ID, also known as "short name" (e.g. ATL03). Available data products can be found at: https://nsidc.org/data/icesat-2/data-sets + **Deprecation warning:** This argument is no longer required and will be deprecated in version 1.0.0. The dataset product is read from the file metadata. - filename_pattern : string, default 'ATL{product:2}_{datetime:%Y%m%d%H%M%S}_{rgt:4}{cycle:2}{orbitsegment:2}_{version:3}_{revision:2}.h5' - String that shows the filename pattern as required for Intake's path_as_pattern argument. + filename_pattern : string, default None + String that shows the filename pattern as previously required for Intake's path_as_pattern argument. The default describes files downloaded directly from NSIDC (subsetted and non-subsetted) for most products (e.g. ATL06). The ATL11 filename pattern from NSIDC is: 'ATL{product:2}_{rgt:4}{orbitsegment:2}_{cycles:4}_{version:3}_{revision:2}.h5'. - + **Deprecation warning:** This argument is no longer required and will be deprecated in version 1.0.0. + catalog : string, default None Full path to an Intake catalog for reading in data. If you still need to create a catalog, leave as default. - **Deprecation warning:** This argument has been depreciated. Please use the data_source argument to pass in valid data. + **Deprecation warning:** This argument has been deprecated. Please use the data_source argument to pass in valid data. + + glob_kwargs : dict, default {} + Additional arguments to be passed into the [glob.glob()](https://docs.python.org/3/library/glob.html#glob.glob)function out_obj_type : object, default xarray.Dataset The desired format for the data to be read in. @@ -296,6 +300,21 @@ class Read: Examples -------- + Reading a single file + >>> ipx.Read('/path/to/data/processed_ATL06_20190226005526_09100205_006_02.h5') # doctest: +SKIP + + Reading all files in a directory + >>> ipx.Read('/path/to/data/') # doctest: +SKIP + + Reading files that match a particular pattern (here, all .h5 files that start with `processed_ATL06_`). + >>> ipx.Read('/path/to/data/processed_ATL06_*.h5') # doctest: +SKIP + + Reading a specific list of files + >>> list_of_files = [ + ... '/path/to/data/processed_ATL06_20190226005526_09100205_006_02.h5', + ... '/path/to/more/data/processed_ATL06_20191202102922_10160505_006_01.h5', + ... ] + >>> ipx.Read(list_of_files) # doctest: +SKIP """ @@ -306,55 +325,106 @@ def __init__( self, data_source=None, product=None, - filename_pattern="ATL{product:2}_{datetime:%Y%m%d%H%M%S}_{rgt:4}{cycle:2}{orbitsegment:2}_{version:3}_{revision:2}.h5", + filename_pattern=None, catalog=None, + glob_kwargs={}, out_obj_type=None, # xr.Dataset, ): - # Raise error for depreciated argument + # Raise error for deprecated argument if catalog: raise DeprecationError( - 'The `catalog` argument has been deprecated and intake is no longer supported. ' - 'Please use the `data_source` argument to specify your dataset instead.' + "The `catalog` argument has been deprecated and intake is no longer supported. " + "Please use the `data_source` argument to specify your dataset instead." ) if data_source is None: - raise ValueError("Please provide a data source.") - else: - self._source_type = _check_datasource(data_source) - self.data_source = data_source + raise ValueError("data_source is a required arguemnt") - if product is None: - raise ValueError( - "Please provide the ICESat-2 data product of your file(s)." + # Raise warnings for deprecated arguments + if filename_pattern: + warnings.warn( + "The `filename_pattern` argument is deprecated. Instead please provide a " + "string, list, or glob string to the `data_source` argument.", + stacklevel=2, ) - else: - self._prod = is2ref._validate_product(product) - pattern_ck, filelist = Read._check_source_for_pattern( - data_source, filename_pattern - ) - assert pattern_ck - # Note: need to check if this works for subset and non-subset NSIDC files (processed_ prepends the former) - self._pattern = filename_pattern - - # this is a first pass at getting rid of mixed product types and warning the user. - # it takes an approach assuming the product name is in the filename, but needs reworking if we let multiple products be loaded - # one way to handle this would be bring in the product info during the loading step and fill in product there instead of requiring it from the user - filtered_filelist = [file for file in filelist if self._prod in file] - if len(filtered_filelist) == 0: + + if product: + product = is2ref._validate_product(product) warnings.warn( - "Your filenames do not contain a product identifier (e.g. ATL06). " - "You will likely need to manually merge your dataframes." + "The `product` argument is no longer required. If the `data_source` argument given " + "contains files with multiple products the `product` argument will be used " + "to filter that list. In all other cases the product argument is ignored. " + "The recommended approach is to not include a `product` argument and instead " + "provide a `data_source` with files of only a single product type`.", + stacklevel=2, ) + + # Create the filelist from the `data_source` argument + if filename_pattern: + # maintained for backward compatibility + pattern_ck, filelist = Read._check_source_for_pattern( + data_source, filename_pattern + ) + assert pattern_ck self._filelist = filelist - elif len(filtered_filelist) < len(filelist): - warnings.warn( - "Some files matching your filename pattern were removed as they were not the specified product." + elif isinstance(data_source, list): + self._filelist = data_source + elif os.path.isdir(data_source): + data_source = os.path.join(data_source, "*") + self._filelist = glob.glob(data_source, **glob_kwargs) + else: + self._filelist = glob.glob(data_source, **glob_kwargs) + # Remove any directories from the list + self._filelist = [f for f in self._filelist if not os.path.isdir(f)] + + # Create a dictionary of the products as read from the metadata + product_dict = {} + for file_ in self._filelist: + product_dict[file_] = self._extract_product(file_) + + # Raise warnings or errors for multiple products or products not matching the user-specified product + all_products = list(set(product_dict.values())) + if len(all_products) > 1: + if product: + warnings.warn( + f"Multiple products found in list of files: {product_dict}. Files that " + "do not match the user specified product will be removed from processing.\n" + "Filtering files using a `product` argument is deprecated. Please use the " + "`data_source` argument to specify a list of files with the same product.", + stacklevel=2, + ) + self._filelist = [] + for key, value in product_dict.items(): + if value == product: + self._filelist.append(key) + if len(self._filelist) == 0: + raise TypeError( + "No files found in the file list matching the user-specified " + "product type" + ) + # Use the cleaned filelist to assign a product + self._product = product + else: + raise TypeError( + f"Multiple product types were found in the file list: {product_dict}." + "Please provide a valid `data_source` parameter indicating files of a single " + "product" + ) + elif len(all_products) == 0: + raise TypeError( + "No files found matching the specified `data_source`. Check your glob " + "string or file list." ) - self._filelist = filtered_filelist else: - self._filelist = filelist - - # after validation, use the notebook code and code outline to start implementing the rest of the class + # Assign the identified product to the property + self._product = all_products[0] + # Raise a warning if the metadata-located product differs from the user-specified product + if product and self._product != product: + warnings.warn( + f"User specified product {product} does not match the product from the file" + " metadata {self._product}", + stacklevel=2, + ) if out_obj_type is not None: print( @@ -387,14 +457,43 @@ def vars(self): if not hasattr(self, "_read_vars"): self._read_vars = Variables( - "file", path=self._filelist[0], product=self._prod + "file", path=self.filelist[0], product=self.product ) return self._read_vars + @property + def filelist(self): + """ + Return the list of files represented by this Read object. + """ + return self._filelist + + @property + def product(self): + """ + Return the product associated with the Read object. + """ + return self._product + # ---------------------------------------------------------------------- # Methods + @staticmethod + def _extract_product(filepath): + """ + Read the product type from the metadata of the file. Return the product as a string. + """ + with h5py.File(filepath, "r") as f: + try: + product = f.attrs["short_name"].decode() + product = is2ref._validate_product(product) + except KeyError: + raise AttributeError( + f"Unable to extract the product name from file metadata." + ) + return product + @staticmethod def _check_source_for_pattern(source, filename_pattern): """ @@ -654,7 +753,7 @@ def load(self): # However, this led to errors when I tried to combine two identical datasets because the single dimension was equal. # In these situations, xarray recommends manually controlling the merge/concat process yourself. # While unlikely to be a broad issue, I've heard of multiple matching timestamps causing issues for combining multiple IS2 datasets. - for file in self._filelist: + for file in self.filelist: all_dss.append( self._build_single_file_dataset(file, groups_list) ) # wanted_groups, vgrp.keys())) @@ -689,7 +788,7 @@ def _build_dataset_template(self, file): gran_idx=[np.uint64(999999)], source_file=(["gran_idx"], [file]), ), - attrs=dict(data_product=self._prod), + attrs=dict(data_product=self.product), ) return is2ds @@ -737,20 +836,11 @@ def _build_single_file_dataset(self, file, groups_list): ------- Xarray Dataset """ - file_product = self._read_single_grp(file, "/").attrs["identifier_product_type"] - assert ( - file_product == self._prod - ), "Your product specification does not match the product specification within your files." - # I think the below method might NOT read the file into memory as the above might? - # import h5py - # with h5py.File(filepath,'r') as h5pt: - # prod_id = h5pt.attrs["identifier_product_type"] - # DEVNOTE: if and elif does not actually apply wanted variable list, and has not been tested for merging multiple files into one ds # if a gridded product # TODO: all products need to be tested, and quicklook products added or explicitly excluded # Level 3b, gridded (netcdf): ATL14, 15, 16, 17, 18, 19, 20, 21 - if self._prod in [ + if self.product in [ "ATL14", "ATL15", "ATL16", @@ -764,7 +854,7 @@ def _build_single_file_dataset(self, file, groups_list): is2ds = xr.open_dataset(file) # Level 3b, hdf5: ATL11 - elif self._prod in ["ATL11"]: + elif self.product in ["ATL11"]: is2ds = self._build_dataset_template(file) # returns the wanted groups as a single list of full group path strings diff --git a/icepyx/tests/test_is2ref.py b/icepyx/tests/test_is2ref.py index b22709c98..fb8d16cad 100644 --- a/icepyx/tests/test_is2ref.py +++ b/icepyx/tests/test_is2ref.py @@ -8,14 +8,14 @@ def test_num_product(): dsnum = 6 - ermsg = "Please enter a product string" + ermsg = "A valid product string was not provided. Check user input, if given, or file metadata." with pytest.raises(TypeError, match=ermsg): is2ref._validate_product(dsnum) def test_bad_product(): wrngds = "atl-6" - ermsg = "Please enter a valid product" + ermsg = "A valid product string was not provided. Check user input, if given, or file metadata." with pytest.raises(AssertionError, match=ermsg): is2ref._validate_product(wrngds) From aedbcce20d851209f3cf15bea6993efbcc2984fe Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Thu, 19 Oct 2023 18:13:18 -0400 Subject: [PATCH 06/14] enable QUEST kwarg handling (#452) * add kwarg acceptance for data queries and download_all in quest * Add QUEST dataset page to RTD --------- Co-authored-by: zachghiaccio --- doc/source/index.rst | 1 + icepyx/quest/quest.py | 99 ++++++++++++++++++++++++++++---------- icepyx/tests/test_quest.py | 31 ++++++++++-- 3 files changed, 102 insertions(+), 29 deletions(-) diff --git a/doc/source/index.rst b/doc/source/index.rst index 719f528b2..586c8810f 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -146,6 +146,7 @@ ICESat-2 datasets to enable scientific discovery. contributing/contribution_guidelines contributing/how_to_contribute contributing/icepyx_internals + contributing/quest-available-datasets contributing/attribution_link contributing/development_plan contributing/release_guide diff --git a/icepyx/quest/quest.py b/icepyx/quest/quest.py index c54e49b73..fe3039a39 100644 --- a/icepyx/quest/quest.py +++ b/icepyx/quest/quest.py @@ -59,7 +59,7 @@ def __init__( date_range=None, start_time=None, end_time=None, - proj="Default", + proj="default", ): """ Tells QUEST to initialize data given the user input spatiotemporal data. @@ -94,9 +94,23 @@ def add_icesat2( tracks=None, files=None, **kwargs, - ): + ) -> None: """ Adds ICESat-2 datasets to QUEST structure. + + Parameters + ---------- + + For details on inputs, see the Query documentation. + + Returns + ------- + None + + See Also + -------- + icepyx.core.GenQuery + icepyx.core.Query """ query = Query( @@ -122,41 +136,76 @@ def add_icesat2( # ---------------------------------------------------------------------- # Methods (on all datasets) - # error handling? what happens when one of i fails... - def search_all(self): + # error handling? what happens when the user tries to re-query? + def search_all(self, **kwargs): """ Searches for requred dataset within platform (i.e. ICESat-2, Argo) of interest. + + Parameters + ---------- + **kwargs : default None + Optional passing of keyword arguments to supply additional search constraints per datasets. + Each key must match the dataset name (e.g. "icesat2", "argo") as in quest.datasets.keys(), + and the value is a dictionary of acceptable keyword arguments + and values allowable for the `search_data()` function for that dataset. + For instance: `icesat2 = {"IDs":True}, argo = {"presRange":"10,500"}`. """ print("\nSearching all datasets...") - for i in self.datasets.values(): + for k, v in self.datasets.items(): print() try: - # querying ICESat-2 data - if isinstance(i, Query): + if isinstance(v, Query): print("---ICESat-2---") - msg = i.avail_granules() + try: + msg = v.avail_granules(kwargs[k]) + except KeyError: + msg = v.avail_granules() print(msg) - else: # querying all other data sets - print(i) - i.search_data() + else: + print(k) + try: + v.search_data(kwargs[k]) + except KeyError: + v.search_data() except: - dataset_name = type(i).__name__ + dataset_name = type(v).__name__ print("Error querying data from {0}".format(dataset_name)) - # error handling? what happens when one of i fails... - def download_all(self, path=""): - ' ' 'Downloads requested dataset(s).' ' ' + # error handling? what happens if the user tries to re-download? + def download_all(self, path="", **kwargs): + """ + Downloads requested dataset(s). + + Parameters + ---------- + **kwargs : default None + Optional passing of keyword arguments to supply additional search constraints per datasets. + Each key must match the dataset name (e.g. "icesat2", "argo") as in quest.datasets.keys(), + and the value is a dictionary of acceptable keyword arguments + and values allowable for the `search_data()` function for that dataset. + For instance: `icesat2 = {"verbose":True}, argo = {"keep_existing":True}`. + """ print("\nDownloading all datasets...") - for i in self.datasets.values(): + for k, v in self.datasets.items(): print() - if isinstance(i, Query): - print("---ICESat-2---") - msg = i.download_granules(path) - print(msg) - else: - i.download() - print(i) - - # DEVNOTE: see colocated data branch and phyto team files for code that expands quest functionality + try: + + if isinstance(v, Query): + print("---ICESat-2---") + try: + msg = v.download_granules(path, kwargs[k]) + except KeyError: + msg = v.download_granules(path) + print(msg) + else: + print(k) + try: + msg = v.download(kwargs[k]) + except KeyError: + msg = v.download() + print(msg) + except: + dataset_name = type(v).__name__ + print("Error downloading data from {0}".format(dataset_name)) diff --git a/icepyx/tests/test_quest.py b/icepyx/tests/test_quest.py index 043ee159e..f50b1bea2 100644 --- a/icepyx/tests/test_quest.py +++ b/icepyx/tests/test_quest.py @@ -68,13 +68,36 @@ def test_add_is2(quest_instance): ########## ALL DATASET METHODS TESTS ########## -# is successful execution enough here? # each of the query functions should be tested in their respective modules def test_search_all(quest_instance): # Search and test all datasets quest_instance.search_all() -def test_download_all(): - # this will require auth in some cases... - pass +@pytest.mark.parametrize( + "kwargs", + [ + {"icesat2": {"IDs": True}}, + # {"argo":{"presRange":"10,500"}}, + # {"icesat2":{"IDs":True}, "argo":{"presRange":"10,500"}} + ], +) +def test_search_all_kwargs(quest_instance, kwargs): + quest_instance.search_all(**kwargs) + + +# TESTS NOT IMPLEMENTED +# def test_download_all(): +# # this will require auth in some cases... +# pass + +# @pytest.mark.parametrize( +# "kwargs", +# [ +# {"icesat2": {"verbose":True}}, +# # {"argo":{"keep_existing":True}, +# # {"icesat2":{"verbose":True}, "argo":{"keep_existing":True} +# ], +# ) +# def test_download_all_kwargs(quest_instance, kwargs): +# pass From 73f929e8c1a50e43f6346aa69cfb607cd5d96e67 Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 11:41:03 -0400 Subject: [PATCH 07/14] docs: add rwegener2 as a contributor for bug, code, and 6 more (#460) Co-authored-by: allcontributors[bot] <46447321+allcontributors[bot]@users.noreply.github.com> Co-authored-by: Jessica Scheick --- .all-contributorsrc | 19 ++++++++++++++++++- CONTRIBUTORS.rst | 11 ++++++----- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/.all-contributorsrc b/.all-contributorsrc index 8f9a076e4..3b321715a 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -422,6 +422,22 @@ "contributions": [ "review" ] + }, + { + "login": "rwegener2", + "name": "Rachel Wegener", + "avatar_url": "https://avatars.githubusercontent.com/u/35503632?v=4", + "profile": "https://rwegener2.github.io/", + "contributions": [ + "bug", + "code", + "doc", + "ideas", + "maintenance", + "review", + "test", + "tutorial" + ] } ], "contributorsPerLine": 7, @@ -430,5 +446,6 @@ "repoType": "github", "repoHost": "https://github.com", "skipCi": true, - "commitConvention": "angular" + "commitConvention": "angular", + "commitType": "docs" } diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst index c6b0c84f5..be362bb28 100644 --- a/CONTRIBUTORS.rst +++ b/CONTRIBUTORS.rst @@ -31,41 +31,42 @@ Thanks goes to these wonderful people (`emoji key Nicole Abib
Nicole Abib
💻 🤔 + Rachel Wegener
Rachel Wegener

🐛 💻 📖 🤔 🚧 👀 ⚠️ Raphael Hagen
Raphael Hagen

📖 🎨 💻 🚇 👀 Romina Piunno
Romina Piunno

💻 🤔 🧑‍🏫 👀 Sarah Hall
Sarah Hall

🐛 💻 📖 🚧 ⚠️ Scott Henderson
Scott Henderson

🚧 Sebastian Alvis
Sebastian Alvis

📖 🚇 Shashank Bhushan
Shashank Bhushan

💡 - Tian Li
Tian Li

🐛 💻 📖 💡 🤔 👀 ⚠️ 🔧 + Tian Li
Tian Li

🐛 💻 📖 💡 🤔 👀 ⚠️ 🔧 Tom Johnson
Tom Johnson

📖 🚇 Tyler Sutterley
Tyler Sutterley

📖 💻 🤔 💬 🛡️ ⚠️ Wei Ji
Wei Ji

🐛 💻 📖 💡 🤔 🚇 🚧 🧑‍🏫 💬 👀 ⚠️ 📢 Wilson Sauthoff
Wilson Sauthoff

👀 Zach Fair
Zach Fair

🐛 💻 📖 🤔 💬 👀 alexdibella
alexdibella

🐛 🤔 💻 - bidhya
bidhya

💡 + bidhya
bidhya

💡 learn2phoenix
learn2phoenix

💻 liuzheng-arctic
liuzheng-arctic

📖 🐛 💻 🤔 👀 🔧 💡 nitin-ravinder
nitin-ravinder

🐛 👀 ravindraK08
ravindraK08

👀 smithb
smithb

🤔 tedmaksym
tedmaksym

🤔 - trevorskaggs
trevorskaggs

🐛 💻 + trevorskaggs
trevorskaggs

🐛 💻 trey-stafford
trey-stafford

💻 🤔 🚧 👀 💬 - + - + This project follows the `all-contributors `_ specification. Contributions of any kind welcome! From a56a9c8864ca23d73850f9e25902c80d75634120 Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 12:07:55 -0400 Subject: [PATCH 08/14] docs: add jpswinski as a contributor for review (#461) Co-authored-by: allcontributors[bot] <46447321+allcontributors[bot]@users.noreply.github.com> Co-authored-by: Jessica Scheick --- .all-contributorsrc | 3 ++- CONTRIBUTORS.rst | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.all-contributorsrc b/.all-contributorsrc index 3b321715a..85b5486b9 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -382,7 +382,8 @@ "avatar_url": "https://avatars.githubusercontent.com/u/54070345?v=4", "profile": "https://github.com/jpswinski", "contributions": [ - "code" + "code", + "review" ] }, { diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst index be362bb28..337ff6661 100644 --- a/CONTRIBUTORS.rst +++ b/CONTRIBUTORS.rst @@ -23,7 +23,7 @@ Thanks goes to these wonderful people (`emoji key Fernando Perez
Fernando Perez

🎨 💼 🤔 - JP Swinski
JP Swinski

💻 + JP Swinski
JP Swinski

💻 👀 Jessica
Jessica

🐛 💻 🖋 📖 🎨 💡 🤔 🚧 🧑‍🏫 📆 💬 👀 Joachim Meyer
Joachim Meyer

🧑‍🏫 🚧 Kelsey Bisson
Kelsey Bisson

🐛 💻 📖 🤔 💡 🤔 🧑‍🏫 💬 👀 From bdcc9bddd81060ced54ffbd323b0dc635ee368de Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Fri, 27 Oct 2023 14:13:58 -0400 Subject: [PATCH 09/14] docs: add whyjz as a contributor for tutorial (#462) Co-authored-by: allcontributors[bot] <46447321+allcontributors[bot]@users.noreply.github.com> Co-authored-by: Jessica Scheick --- .all-contributorsrc | 9 +++++++++ CONTRIBUTORS.rst | 5 +++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.all-contributorsrc b/.all-contributorsrc index 85b5486b9..6b24eac03 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -439,6 +439,15 @@ "test", "tutorial" ] + }, + { + "login": "whyjz", + "name": "Whyjay Zheng", + "avatar_url": "https://avatars.githubusercontent.com/u/19339926?v=4", + "profile": "https://whyjz.github.io/", + "contributions": [ + "tutorial" + ] } ], "contributorsPerLine": 7, diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst index 337ff6661..1fd8bab42 100644 --- a/CONTRIBUTORS.rst +++ b/CONTRIBUTORS.rst @@ -44,20 +44,21 @@ Thanks goes to these wonderful people (`emoji key Tom Johnson
Tom Johnson

📖 🚇 Tyler Sutterley
Tyler Sutterley

📖 💻 🤔 💬 🛡️ ⚠️ Wei Ji
Wei Ji

🐛 💻 📖 💡 🤔 🚇 🚧 🧑‍🏫 💬 👀 ⚠️ 📢 + Whyjay Zheng
Whyjay Zheng

Wilson Sauthoff
Wilson Sauthoff

👀 Zach Fair
Zach Fair

🐛 💻 📖 🤔 💬 👀 - alexdibella
alexdibella

🐛 🤔 💻 + alexdibella
alexdibella

🐛 🤔 💻 bidhya
bidhya

💡 learn2phoenix
learn2phoenix

💻 liuzheng-arctic
liuzheng-arctic

📖 🐛 💻 🤔 👀 🔧 💡 nitin-ravinder
nitin-ravinder

🐛 👀 ravindraK08
ravindraK08

👀 smithb
smithb

🤔 - tedmaksym
tedmaksym

🤔 + tedmaksym
tedmaksym

🤔 trevorskaggs
trevorskaggs

🐛 💻 trey-stafford
trey-stafford

💻 🤔 🚧 👀 💬 From fb90b0c6b18e96379ab2468b3aa2f3f4d27de6ee Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Thu, 2 Nov 2023 10:04:30 -0400 Subject: [PATCH 10/14] add newest icepyx citations (#455) --- doc/source/tracking/citations.rst | 2 ++ doc/source/tracking/icepyx_pubs.bib | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/doc/source/tracking/citations.rst b/doc/source/tracking/citations.rst index b31132be8..bf5672587 100644 --- a/doc/source/tracking/citations.rst +++ b/doc/source/tracking/citations.rst @@ -49,6 +49,8 @@ Research that utilizes icepyx for ICESat-2 data .. bibliography:: icepyx_pubs.bib :style: mystyle + Freer2023 + Idestrom2023 Shean2023 Eidam2022 Leeuwen:2022 diff --git a/doc/source/tracking/icepyx_pubs.bib b/doc/source/tracking/icepyx_pubs.bib index a1d945c01..d13c9653f 100644 --- a/doc/source/tracking/icepyx_pubs.bib +++ b/doc/source/tracking/icepyx_pubs.bib @@ -183,6 +183,30 @@ @inProceedings{Fernando:2021 } +@Article{Freer2023, +AUTHOR = {Freer, B. I. D. and Marsh, O. J. and Hogg, A. E. and Fricker, H. A. and Padman, L.}, +TITLE = {Modes of {Antarctic} tidal grounding line migration revealed by {Ice, Cloud, and land Elevation Satellite-2 (ICESat-2)} laser altimetry}, +JOURNAL = {The Cryosphere}, +VOLUME = {17}, +YEAR = {2023}, +NUMBER = {9}, +PAGES = {4079--4101}, +URL = {https://tc.copernicus.org/articles/17/4079/2023/}, +DOI = {10.5194/tc-17-4079-2023} +} + + +@mastersthesis{Idestrom2023, + author = {Petter Idestr\"{o}m}, + title = {Remote Sensing of Cryospheric Surfaces: Small Scale Surface Roughness Signatures in Satellite Altimetry Data}, + school = {Ume\aa University}, + year = {2023}, + address = {Sweden}, + month = {Sept.}, + url = {https://www.diva-portal.org/smash/get/diva2:1801057/FULLTEXT01.pdf} +} + + @misc{Leeuwen:2022, author = {van Leeuwen, Gijs}, title = {The automated retrieval of supraglacial lake depth and extent from {ICESat-2} photon clouds leveraging {DBSCAN} clustering}, From d5747fae827ae734e9fa329371c2bde3fa3995bd Mon Sep 17 00:00:00 2001 From: Rachel Wegener <35503632+rwegener2@users.noreply.github.com> Date: Tue, 7 Nov 2023 07:17:42 -0500 Subject: [PATCH 11/14] Variables as an independent class (#451) Refactor Variables class to be user facing functionality --- .../IS2_data_access2-subsetting.ipynb | 42 +- .../IS2_data_variables.ipynb | 351 ++++++++++++- .../documentation/classes_dev_uml.svg | 497 +++++++++--------- .../documentation/classes_user_uml.svg | 33 +- .../user_guide/documentation/components.rst | 8 - .../user_guide/documentation/icepyx.rst | 1 + .../documentation/packages_user_uml.svg | 60 ++- .../user_guide/documentation/variables.rst | 25 + icepyx/__init__.py | 1 + icepyx/core/is2ref.py | 53 +- icepyx/core/query.py | 51 +- icepyx/core/read.py | 59 ++- icepyx/core/variables.py | 160 +++--- 13 files changed, 880 insertions(+), 461 deletions(-) create mode 100644 doc/source/user_guide/documentation/variables.rst diff --git a/doc/source/example_notebooks/IS2_data_access2-subsetting.ipynb b/doc/source/example_notebooks/IS2_data_access2-subsetting.ipynb index 89247de5f..3803b9fd6 100644 --- a/doc/source/example_notebooks/IS2_data_access2-subsetting.ipynb +++ b/doc/source/example_notebooks/IS2_data_access2-subsetting.ipynb @@ -51,7 +51,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "Create a query object and log in to Earthdata\n", "\n", @@ -83,7 +85,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "## Discover Subsetting Options\n", "\n", @@ -108,7 +112,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "By default, spatial and temporal subsetting based on your initial inputs is applied to your order unless you specify `subset=False` to `order_granules()` or `download_granules()` (which calls `order_granules` under the hood if you have not already placed your order) functions.\n", "Additional subsetting options must be specified as keyword arguments to the order/download functions.\n", @@ -118,7 +124,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "### _Why do I have to provide spatial bounds to icepyx even if I don't use them to subset my data order?_\n", "\n", @@ -132,7 +140,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "## About Data Variables in a query object\n", "\n", @@ -145,7 +155,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "### Determine what variables are available for your data product\n", "There are multiple ways to get a complete list of available variables.\n", @@ -159,7 +171,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "region_a.order_vars.avail()" @@ -167,7 +181,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "By passing the boolean `options=True` to the `avail` method, you can obtain lists of unique possible variable inputs (var_list inputs) and path subdirectory inputs (keyword_list and beam_list inputs) for your data product. These can be helpful for building your wanted variable list." ] @@ -175,7 +191,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "region_a.order_vars.avail(options=True)" @@ -353,9 +371,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "icepyx-dev", "language": "python", - "name": "python3" + "name": "icepyx-dev" }, "language_info": { "codemirror_mode": { @@ -367,7 +385,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/doc/source/example_notebooks/IS2_data_variables.ipynb b/doc/source/example_notebooks/IS2_data_variables.ipynb index 3ac1f99fe..78a250789 100644 --- a/doc/source/example_notebooks/IS2_data_variables.ipynb +++ b/doc/source/example_notebooks/IS2_data_variables.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "# ICESat-2's Nested Variables\n", "\n", @@ -13,10 +15,10 @@ "\n", "A given ICESat-2 product may have over 200 variable + path combinations.\n", "icepyx includes a custom `Variables` module that is \"aware\" of the ATLAS sensor and how the ICESat-2 data products are stored.\n", - "The module can be accessed independently, but is optimally used as a component of a `Query` object (Case 1) or `Read` object (Case 2).\n", + "The module can be accessed independently, and can also be accessed as a component of a `Query` object or `Read` object.\n", "\n", - "This notebook illustrates in detail how the `Variables` module behaves using a `Query` data access example.\n", - "However, module usage is analogous through an icepyx ICESat-2 `Read` object.\n", + "This notebook illustrates in detail how the `Variables` module behaves. We use the module independently and also show how powerful it is directly in the icepyx workflow using a `Query` data access example.\n", + "Module usage using `Query` is analogous through an icepyx ICESat-2 `Read` object.\n", "More detailed example workflows specifically for the [query](https://icepyx.readthedocs.io/en/latest/example_notebooks/IS2_data_access.html) and [read](https://icepyx.readthedocs.io/en/latest/example_notebooks/IS2_data_read-in.html) tools within icepyx are available as separate Jupyter Notebooks.\n", "\n", "Questions? Be sure to check out the FAQs throughout this notebook, indicated as italic headings." @@ -24,11 +26,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "### _Why do ICESat-2 products need a custom variable manager?_\n", "\n", "_It can be confusing and cumbersome to comb through the 200+ variable and path combinations contained in ICESat-2 data products._\n", + "_An hdf5 file is built like a folder with files in it. Opening an ICESat-2 file can be like opening a new folder with over 200 files in it and manually searching for only ones you want!_\n", + "\n", "_The icepyx `Variables` module makes it easier for users to quickly find and extract the specific variables they would like to work with across multiple beams, keywords, and variables and provides reader-friendly formatting to browse variables._\n", "_A future development goal for `icepyx` includes developing an interactive widget to further improve the user experience._\n", "_For data read-in, additional tools are available to target specific beam characteristics (e.g. strong versus weak beams)._" @@ -38,35 +44,245 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Some technical details about the Variables module\n", - "For those eager to push the limits or who want to know more implementation details...\n", + "Import packages, including icepyx" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import icepyx as ipx\n", + "from pprint import pprint" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "## Creating or Accessing ICESat-2 Variables" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "There are three ways to create or access an ICESat-2 Variables object in icepyx:\n", + "1. Access via the `.order_vars` property of a Query object\n", + "2. Access via the `.vars` property of a Read object\n", + "3. Create a stand-alone ICESat-2 Variables object using a local file or a product name\n", "\n", - "The only required input to the `Variables` module is `vartype`.\n", - "`vartype` has two acceptible string values, 'order' and 'file'.\n", - "If you use the module as shown in icepyx examples (namely through a `Read` or `Query` object), then this flag will be passed automatically.\n", - "It simply tells the software how to generate the list of possible variable values - either by pinging NSIDC for a list of available variables (`query`) or from the user-supplied file (`read`)." + "An example of each of these is shown below." ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ - "Import packages, including icepyx" + "### 1. Access `Variables` via the `.order_vars` property of a Query object" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "import icepyx as ipx\n", - "from pprint import pprint" + "region_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \\\n", + " start_time='00:00:00', end_time='23:59:59')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Accessing Variables\n", + "region_a.order_vars" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Showing the variable paths\n", + "region_a.order_vars.avail()" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [], + "user_expressions": [] + }, + "source": [ + "### 2. Access via the `.vars` property of a Read object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "path_root = '/full/path/to/your/data/'\n", + "reader = ipx.Read(path_root)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Accessing Variables\n", + "reader.vars" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Showing the variable paths\n", + "# reader.vars.avail()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "### 3. Create a stand-alone Variables object\n", + "\n", + "You can also generate an independent Variables object. This can be done using either:\n", + "1. The filepath to a file you'd like a variables list for\n", + "2. The product name (and optionally version) of a an ICESat-2 product" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "Create a variables object from a filepath:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "filepath = '/full/path/to/your/data.h5'\n", + "v = ipx.Variables(path=filepath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# v.avail()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "Create a variables object from a product. The version argument is optional." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "v = ipx.Variables(product='ATL03')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# v.avail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "v = ipx.Variables(product='ATL03', version='004')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# v.avail()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "Now that you know how to create or access Variables the remainder of this notebook showcases the functions availble for building and modifying variables lists. Remember, the example shown below uses a Query object, but the same methods are available if you are using a Read object or a Variables object." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, "source": [ "## Interacting with ICESat-2 Data Variables\n", "\n", @@ -88,7 +304,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "Create a query object and log in to Earthdata\n", "\n", @@ -134,7 +352,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "### ICESat-2 data variables\n", "\n", @@ -157,7 +377,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "To increase readability, you can use built in functions to show the 200+ variable + path combinations as a dictionary where the keys are variable names and the values are the paths to that variable.\n", "`region_a.order_vars.parse_var_list(region_a.order_vars.avail())` will return a dictionary of variable:paths key:value pairs." @@ -174,7 +396,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "By passing the boolean `options=True` to the `avail` method, you can obtain lists of unique possible variable inputs (var_list inputs) and path subdirectory inputs (keyword_list and beam_list inputs) for your data product. These can be helpful for building your wanted variable list." ] @@ -188,6 +412,30 @@ "region_a.order_vars.avail(options=True)" ] }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "```{admonition} Remember\n", + "You can run these same methods no matter how you created or accessed your ICESat-2 Variables. So the methods in this section could be equivalently be accessed using a Read object, or by directly accessing a file on your computer:\n", + "\n", + "```\n", + "```python\n", + "# Using a Read object\n", + "reader.vars.avail()\n", + "reader.vars.parse_var_list(reader.vars.avail())\n", + "reader.vars.avail(options=True)\n", + "\n", + "# Using a file on your computer\n", + "v = Variables(path='/my/file.h5')\n", + "v.avail()\n", + "v.parse_var_list(v.avail())\n", + "v.avail(options=True)\n", + "```\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -228,7 +476,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "The keywords available for this product are shown in the error message upon entering a blank keyword_list, as seen in the next cell." ] @@ -745,13 +995,62 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "#### With a `Read` object\n", "Calling the `load()` method on your `Read` object will automatically look for your wanted variable list and use it.\n", "Please see the [read-in example Jupyter Notebook](https://icepyx.readthedocs.io/en/latest/example_notebooks/IS2_data_read-in.html) for a complete example of this usage.\n" ] }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "#### With a local filepath\n", + "\n", + "One of the benefits of using a local filepath in variables is that it allows you to easily inspect the variables that are available in your file. Once you have a variable of interest from the `avail` list, you could read that variable in with another library, such as xarray. The example below demonstrates this assuming an ATL06 ICESat-2 file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "filepath = '/full/path/to/my/ATL06_file.h5'\n", + "v = ipx.Variables(path=filepath)\n", + "v.avail()\n", + "# Browse paths and decide you need `gt1l/land_ice_segments/`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import xarray as xr\n", + "\n", + "xr.open_dataset(filepath, group='gt1l/land_ice_segments/', engine='h5netcdf')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "You'll notice in this workflow you are limited to viewing data only within a particular group. Icepyx also provides functionality for merging variables within or even across files. See the [read-in example Jupyter Notebook](https://icepyx.readthedocs.io/en/latest/example_notebooks/IS2_data_read-in.html) for more details about these features of icepyx." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -763,9 +1062,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "icepyx-dev", "language": "python", - "name": "python3" + "name": "icepyx-dev" }, "language_info": { "codemirror_mode": { @@ -777,7 +1076,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/doc/source/user_guide/documentation/classes_dev_uml.svg b/doc/source/user_guide/documentation/classes_dev_uml.svg index 0cd08c9e9..765e0d531 100644 --- a/doc/source/user_guide/documentation/classes_dev_uml.svg +++ b/doc/source/user_guide/documentation/classes_dev_uml.svg @@ -4,328 +4,329 @@ - - + + classes_dev_uml - + icepyx.core.auth.AuthenticationError - -AuthenticationError - - - + +AuthenticationError + + + icepyx.core.exceptions.DeprecationError - -DeprecationError - - - + +DeprecationError + + + icepyx.core.auth.EarthdataAuthMixin - -EarthdataAuthMixin - -_auth : Auth, NoneType -_s3_initial_ts : NoneType, datetime -_s3login_credentials : NoneType, dict -_session : NoneType -auth -s3login_credentials -session - -__init__(auth) -__str__() -earthdata_login(uid, email, s3token): None + +EarthdataAuthMixin + +_auth : NoneType +_s3_initial_ts : NoneType, datetime +_s3login_credentials : NoneType +_session : NoneType +auth +s3login_credentials +session + +__init__(auth) +__str__() +earthdata_login(uid, email, s3token): None icepyx.core.query.GenQuery - -GenQuery - -_spatial -_temporal -dates -end_time -spatial -spatial_extent -start_time -temporal - -__init__(spatial_extent, date_range, start_time, end_time) -__str__() + +GenQuery + +_spatial +_temporal +dates +end_time +spatial +spatial_extent +start_time +temporal + +__init__(spatial_extent, date_range, start_time, end_time) +__str__() icepyx.core.granules.Granules - -Granules - -avail : list -orderIDs : list - -__init__ -() -download(verbose, path, session, restart) -get_avail(CMRparams, reqparams, cloud) -place_order(CMRparams, reqparams, subsetparams, verbose, subset, session, geom_filepath) + +Granules + +avail : list +orderIDs : list + +__init__ +() +download(verbose, path, session, restart) +get_avail(CMRparams, reqparams, cloud) +place_order(CMRparams, reqparams, subsetparams, verbose, subset, session, geom_filepath) icepyx.core.query.Query - -Query - -CMRparams -_CMRparams -_about_product -_cust_options : dict -_cycles : list -_file_vars -_granules -_order_vars -_prod : NoneType, str -_readable_granule_name : list -_reqparams -_source : str -_subsetparams : NoneType -_tracks : list -_version -cycles -dataset -file_vars -granules -order_vars -product -product_version -reqparams -tracks - -__init__(product, spatial_extent, date_range, start_time, end_time, version, cycles, tracks, files, auth) -__str__() -avail_granules(ids, cycles, tracks, cloud) -download_granules(path, verbose, subset, restart) -latest_version() -order_granules(verbose, subset, email) -product_all_info() -product_summary_info() -show_custom_options(dictview) -subsetparams() -visualize_elevation() -visualize_spatial_extent() + +Query + +CMRparams +_CMRparams +_about_product +_cust_options : dict +_cycles : list +_file_vars +_granules +_order_vars +_prod : NoneType, str +_readable_granule_name : list +_reqparams +_source : str +_subsetparams : NoneType +_tracks : list +_version +cycles +dataset +file_vars +granules +order_vars +product +product_version +reqparams +tracks + +__init__(product, spatial_extent, date_range, start_time, end_time, version, cycles, tracks, files, auth) +__str__() +avail_granules(ids, cycles, tracks, cloud) +download_granules(path, verbose, subset, restart) +latest_version() +order_granules(verbose, subset, email) +product_all_info() +product_summary_info() +show_custom_options(dictview) +subsetparams() +visualize_elevation() +visualize_spatial_extent() icepyx.core.granules.Granules->icepyx.core.query.Query - - -_granules + + +_granules icepyx.core.granules.Granules->icepyx.core.query.Query - - -_granules + + +_granules icepyx.core.icesat2data.Icesat2Data - -Icesat2Data - - -__init__() + +Icesat2Data + + +__init__() icepyx.core.exceptions.NsidcQueryError - -NsidcQueryError - -errmsg -msgtxt : str - -__init__(errmsg, msgtxt) -__str__() + +NsidcQueryError + +errmsg +msgtxt : str + +__init__(errmsg, msgtxt) +__str__() icepyx.core.exceptions.QueryError - -QueryError - - - + +QueryError + + + icepyx.core.exceptions.NsidcQueryError->icepyx.core.exceptions.QueryError - - + + icepyx.core.APIformatting.Parameters - -Parameters - -_fmted_keys : NoneType, dict -_poss_keys : dict -_reqtype : NoneType, str -fmted_keys -partype -poss_keys - -__init__(partype, values, reqtype) -_check_valid_keys() -_get_possible_keys() -build_params() -check_req_values() -check_values() + +Parameters + +_fmted_keys : NoneType, dict +_poss_keys : dict +_reqtype : NoneType, str +fmted_keys +partype +poss_keys + +__init__(partype, values, reqtype) +_check_valid_keys() +_get_possible_keys() +build_params() +check_req_values() +check_values() icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_CMRparams + + +_CMRparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_reqparams + + +_reqparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_subsetparams + + +_subsetparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_subsetparams + + +_subsetparams icepyx.core.query.Query->icepyx.core.auth.EarthdataAuthMixin - - + + icepyx.core.query.Query->icepyx.core.query.GenQuery - - + + icepyx.core.read.Read - -Read - -_filelist : NoneType, list -_out_obj : Dataset -_product : NoneType, str -_read_vars -filelist -product -vars - -__init__(data_source, product, filename_pattern, catalog, glob_kwargs, out_obj_type) -_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) -_build_dataset_template(file) -_build_single_file_dataset(file, groups_list) -_check_source_for_pattern(source, filename_pattern) -_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) -_extract_product(filepath) -_read_single_grp(file, grp_path) -load() + +Read + +_filelist : NoneType, list +_out_obj : Dataset +_product : NoneType, str +_read_vars +filelist +product +vars + +__init__(data_source, product, filename_pattern, catalog, glob_kwargs, out_obj_type) +_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) +_build_dataset_template(file) +_build_single_file_dataset(file, groups_list) +_check_source_for_pattern(source, filename_pattern) +_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) +_read_single_grp(file, grp_path) +load() icepyx.core.spatial.Spatial - -Spatial - -_ext_type : str -_gdf_spat : GeoDataFrame -_geom_file : NoneType -_spatial_ext -_xdateln -extent -extent_as_gdf -extent_file -extent_type - -__init__(spatial_extent) -__str__() -fmt_for_CMR() -fmt_for_EGI() + +Spatial + +_ext_type : str +_gdf_spat : GeoDataFrame +_geom_file : NoneType +_spatial_ext +_xdateln +extent +extent_as_gdf +extent_file +extent_type + +__init__(spatial_extent) +__str__() +fmt_for_CMR() +fmt_for_EGI() icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery - - -_spatial + + +_spatial icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery - - -_spatial + + +_spatial icepyx.core.temporal.Temporal - -Temporal - -_end : datetime -_start : datetime -end -start - -__init__(date_range, start_time, end_time) -__str__() + +Temporal + +_end : datetime +_start : datetime +end +start + +__init__(date_range, start_time, end_time) +__str__() icepyx.core.temporal.Temporal->icepyx.core.query.GenQuery - - -_temporal + + +_temporal icepyx.core.variables.Variables - -Variables - -_avail : NoneType, list -_vartype -_version : NoneType -path : NoneType -product : NoneType -wanted : NoneType, dict + +Variables + +_avail : NoneType, list +_path : NoneType +_product : NoneType, str +_version +path +product +version +wanted : NoneType, dict -__init__(vartype, avail, wanted, product, version, path, auth) +__init__(vartype, path, product, version, avail, wanted, auth) _check_valid_lists(vgrp, allpaths, var_list, beam_list, keyword_list) _get_combined_list(beam_list, keyword_list) _get_sum_varlist(var_list, all_vars, defaults) @@ -339,57 +340,57 @@ icepyx.core.variables.Variables->icepyx.core.auth.EarthdataAuthMixin - - + + icepyx.core.variables.Variables->icepyx.core.query.Query - - -_order_vars + + +_order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - -_order_vars + + +_order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - -_file_vars + + +_file_vars icepyx.core.variables.Variables->icepyx.core.read.Read - - -_read_vars + + +_read_vars icepyx.core.visualization.Visualize - -Visualize - -bbox : list -cycles : NoneType -date_range : NoneType -product : NoneType, str -tracks : NoneType - -__init__(query_obj, product, spatial_extent, date_range, cycles, tracks) -generate_OA_parameters(): list -grid_bbox(binsize): list -make_request(base_url, payload) -parallel_request_OA(): da.array -query_icesat2_filelist(): tuple -request_OA_data(paras): da.array -viz_elevation(): (hv.DynamicMap, hv.Layout) + +Visualize + +bbox : list +cycles : NoneType +date_range : NoneType +product : NoneType, str +tracks : NoneType + +__init__(query_obj, product, spatial_extent, date_range, cycles, tracks) +generate_OA_parameters(): list +grid_bbox(binsize): list +make_request(base_url, payload) +parallel_request_OA(): da.array +query_icesat2_filelist(): tuple +request_OA_data(paras): da.array +viz_elevation(): (hv.DynamicMap, hv.Layout) diff --git a/doc/source/user_guide/documentation/classes_user_uml.svg b/doc/source/user_guide/documentation/classes_user_uml.svg index a9c116469..59b8e8e6f 100644 --- a/doc/source/user_guide/documentation/classes_user_uml.svg +++ b/doc/source/user_guide/documentation/classes_user_uml.svg @@ -259,49 +259,50 @@ icepyx.core.variables.Variables - -Variables - -path : NoneType -product : NoneType -wanted : NoneType, dict - -append(defaults, var_list, beam_list, keyword_list) -avail(options, internal) -parse_var_list(varlist, tiered, tiered_vars) -remove(all, var_list, beam_list, keyword_list) + +Variables + +path +product +version +wanted : NoneType, dict + +append(defaults, var_list, beam_list, keyword_list) +avail(options, internal) +parse_var_list(varlist, tiered, tiered_vars) +remove(all, var_list, beam_list, keyword_list) icepyx.core.variables.Variables->icepyx.core.auth.EarthdataAuthMixin - + icepyx.core.variables.Variables->icepyx.core.query.Query - + _order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - + _order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - + _file_vars icepyx.core.variables.Variables->icepyx.core.read.Read - + _read_vars diff --git a/doc/source/user_guide/documentation/components.rst b/doc/source/user_guide/documentation/components.rst index b4b658385..dea41a970 100644 --- a/doc/source/user_guide/documentation/components.rst +++ b/doc/source/user_guide/documentation/components.rst @@ -67,14 +67,6 @@ validate\_inputs :undoc-members: :show-inheritance: -variables ---------- - -.. automodule:: icepyx.core.variables - :members: - :undoc-members: - :show-inheritance: - visualize --------- diff --git a/doc/source/user_guide/documentation/icepyx.rst b/doc/source/user_guide/documentation/icepyx.rst index 56ff7f496..a8a9a6f8e 100644 --- a/doc/source/user_guide/documentation/icepyx.rst +++ b/doc/source/user_guide/documentation/icepyx.rst @@ -23,4 +23,5 @@ Diagrams are updated automatically after a pull request (PR) is approved and bef query read quest + variables components diff --git a/doc/source/user_guide/documentation/packages_user_uml.svg b/doc/source/user_guide/documentation/packages_user_uml.svg index 44a041c77..8d8cf0dc9 100644 --- a/doc/source/user_guide/documentation/packages_user_uml.svg +++ b/doc/source/user_guide/documentation/packages_user_uml.svg @@ -4,11 +4,11 @@ - + packages_user_uml - + icepyx.core @@ -24,14 +24,14 @@ icepyx.core.auth - -icepyx.core.auth + +icepyx.core.auth icepyx.core.exceptions - -icepyx.core.exceptions + +icepyx.core.exceptions @@ -42,14 +42,14 @@ icepyx.core.icesat2data - -icepyx.core.icesat2data + +icepyx.core.icesat2data icepyx.core.is2ref - -icepyx.core.is2ref + +icepyx.core.is2ref @@ -60,8 +60,8 @@ icepyx.core.query->icepyx.core.auth - - + + @@ -96,44 +96,50 @@ icepyx.core.read - -icepyx.core.read + +icepyx.core.read icepyx.core.read->icepyx.core.exceptions - - + + icepyx.core.read->icepyx.core.variables - - + + icepyx.core.spatial - -icepyx.core.spatial + +icepyx.core.spatial icepyx.core.temporal - -icepyx.core.temporal + +icepyx.core.temporal icepyx.core.validate_inputs - -icepyx.core.validate_inputs + +icepyx.core.validate_inputs icepyx.core.variables->icepyx.core.auth - - + + + + + +icepyx.core.variables->icepyx.core.exceptions + + diff --git a/doc/source/user_guide/documentation/variables.rst b/doc/source/user_guide/documentation/variables.rst new file mode 100644 index 000000000..e147bfd64 --- /dev/null +++ b/doc/source/user_guide/documentation/variables.rst @@ -0,0 +1,25 @@ +Variables Class +================= + +.. currentmodule:: icepyx + + +Constructor +----------- + +.. autosummary:: + :toctree: ../../_icepyx/ + + Variables + + +Methods +------- + +.. autosummary:: + :toctree: ../../_icepyx/ + + Variables.avail + Variables.parse_var_list + Variables.append + Variables.remove diff --git a/icepyx/__init__.py b/icepyx/__init__.py index 3d92e2e60..40ea9e1ec 100644 --- a/icepyx/__init__.py +++ b/icepyx/__init__.py @@ -1,5 +1,6 @@ from icepyx.core.query import Query, GenQuery from icepyx.core.read import Read from icepyx.quest.quest import Quest +from icepyx.core.variables import Variables from _icepyx_version import version as __version__ diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py index 5faaef110..a90c8fafa 100644 --- a/icepyx/core/is2ref.py +++ b/icepyx/core/is2ref.py @@ -1,3 +1,4 @@ +import h5py import json import numpy as np import requests @@ -110,7 +111,11 @@ def _get_custom_options(session, product, version): # reformatting formats = [Format.attrib for Format in root.iter("Format")] format_vals = [formats[i]["value"] for i in range(len(formats))] - format_vals.remove("") + try: + format_vals.remove("") + except KeyError: + # ATL23 does not have an empty value + pass cust_options.update({"fileformats": format_vals}) # reprojection only applicable on ICESat-2 L3B products. @@ -324,3 +329,49 @@ def gt2spot(gt, sc_orient): raise ValueError("Could not compute the spot number.") return np.uint8(spot) + +def latest_version(product): + """ + Determine the most recent version available for the given product. + + Examples + -------- + >>> latest_version('ATL03') + '006' + """ + _about_product = about_product(product) + return max( + [entry["version_id"] for entry in _about_product["feed"]["entry"]] + ) + +def extract_product(filepath): + """ + Read the product type from the metadata of the file. Return the product as a string. + """ + with h5py.File(filepath, 'r') as f: + try: + product = f.attrs['short_name'] + if isinstance(product, bytes): + # For most products the short name is stored in a bytes string + product = product.decode() + elif isinstance(product, np.ndarray): + # ATL14 saves the short_name as an array ['ATL14'] + product = product[0] + product = _validate_product(product) + except KeyError: + raise 'Unable to parse the product name from file metadata' + return product + +def extract_version(filepath): + """ + Read the version from the metadata of the file. Return the version as a string. + """ + with h5py.File(filepath, 'r') as f: + try: + version = f['METADATA']['DatasetIdentification'].attrs['VersionID'] + if isinstance(version, np.ndarray): + # ATL14 stores the version as an array ['00x'] + version = version[0] + except KeyError: + raise 'Unable to parse the version from file metadata' + return version diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 3459fd132..8700d5655 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -12,6 +12,7 @@ import icepyx.core.APIformatting as apifmt from icepyx.core.auth import EarthdataAuthMixin import icepyx.core.granules as granules + # QUESTION: why doesn't from granules import Granules work, since granules=icepyx.core.granules? from icepyx.core.granules import Granules import icepyx.core.is2ref as is2ref @@ -432,7 +433,7 @@ def __init__( super().__init__(spatial_extent, date_range, start_time, end_time, **kwargs) - self._version = val.prod_version(self.latest_version(), version) + self._version = val.prod_version(is2ref.latest_version(self._prod), version) # build list of available CMR parameters if reducing by cycle or RGT # or a list of explicitly named files (full or partial names) @@ -448,6 +449,7 @@ def __init__( # initialize authentication properties EarthdataAuthMixin.__init__(self) + # ---------------------------------------------------------------------- # Properties @@ -646,6 +648,27 @@ def subsetparams(self, **kwargs): if self._subsetparams == None and not kwargs: return {} else: + # If the user has supplied a subset list of variables, append the + # icepyx required variables to the Coverage dict + if "Coverage" in kwargs.keys(): + var_list = [ + "orbit_info/sc_orient", + "orbit_info/sc_orient_time", + "ancillary_data/atlas_sdp_gps_epoch", + "orbit_info/cycle_number", + "orbit_info/rgt", + "ancillary_data/data_start_utc", + "ancillary_data/data_end_utc", + "ancillary_data/granule_start_utc", + "ancillary_data/granule_end_utc", + "ancillary_data/start_delta_time", + "ancillary_data/end_delta_time", + ] + # Add any variables from var_list to Coverage that are not already included + for var in var_list: + if var not in kwargs["Coverage"].keys(): + kwargs["Coverage"][var.split("/")[-1]] = [var] + if self._subsetparams == None: self._subsetparams = apifmt.Parameters("subset") if self._spatial._geom_file is not None: @@ -688,17 +711,16 @@ def order_vars(self): # DevGoal: check for active session here if hasattr(self, "_cust_options"): self._order_vars = Variables( - self._source, - auth = self.auth, product=self.product, + version=self._version, avail=self._cust_options["variables"], + auth=self.auth, ) else: self._order_vars = Variables( - self._source, - auth=self.auth, product=self.product, version=self._version, + auth=self.auth, ) # I think this is where property setters come in, and one should be used here? Right now order_vars.avail is only filled in @@ -722,17 +744,18 @@ def file_vars(self): Examples -------- >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28']) # doctest: +SKIP - + >>> reg_a.file_vars # doctest: +SKIP """ if not hasattr(self, "_file_vars"): if self._source == "file": - self._file_vars = Variables(self._source, - auth=self.auth, - product=self.product, - ) + self._file_vars = Variables( + auth=self.auth, + product=self.product, + version=self._version, + ) return self._file_vars @@ -815,6 +838,8 @@ def product_all_info(self): def latest_version(self): """ + A reference function to is2ref.latest_version. + Determine the most recent version available for the given product. Examples @@ -823,11 +848,7 @@ def latest_version(self): >>> reg_a.latest_version() '006' """ - if not hasattr(self, "_about_product"): - self._about_product = is2ref.about_product(self._prod) - return max( - [entry["version_id"] for entry in self._about_product["feed"]["entry"]] - ) + return is2ref.latest_version(self.product) def show_custom_options(self, dictview=False): """ diff --git a/icepyx/core/read.py b/icepyx/core/read.py index a85ee659b..842eab51f 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -320,10 +320,10 @@ class Read: # ---------------------------------------------------------------------- # Constructors - + def __init__( self, - data_source=None, + data_source=None, # DevNote: Make this a required arg when catalog is removed product=None, filename_pattern=None, catalog=None, @@ -336,10 +336,9 @@ def __init__( "The `catalog` argument has been deprecated and intake is no longer supported. " "Please use the `data_source` argument to specify your dataset instead." ) - + if data_source is None: raise ValueError("data_source is a required arguemnt") - # Raise warnings for deprecated arguments if filename_pattern: warnings.warn( @@ -380,7 +379,7 @@ def __init__( # Create a dictionary of the products as read from the metadata product_dict = {} for file_ in self._filelist: - product_dict[file_] = self._extract_product(file_) + product_dict[file_] = is2ref.extract_product(file_) # Raise warnings or errors for multiple products or products not matching the user-specified product all_products = list(set(product_dict.values())) @@ -456,12 +455,9 @@ def vars(self): """ if not hasattr(self, "_read_vars"): - self._read_vars = Variables( - "file", path=self.filelist[0], product=self.product - ) - + self._read_vars = Variables(path=self.filelist[0]) return self._read_vars - + @property def filelist(self): """ @@ -478,22 +474,6 @@ def product(self): # ---------------------------------------------------------------------- # Methods - - @staticmethod - def _extract_product(filepath): - """ - Read the product type from the metadata of the file. Return the product as a string. - """ - with h5py.File(filepath, "r") as f: - try: - product = f.attrs["short_name"].decode() - product = is2ref._validate_product(product) - except KeyError: - raise AttributeError( - f"Unable to extract the product name from file metadata." - ) - return product - @staticmethod def _check_source_for_pattern(source, filename_pattern): """ @@ -742,8 +722,33 @@ def load(self): # so to get a combined dataset, we need to keep track of spots under the hood, open each group, and then combine them into one xarray where the spots are IDed somehow (or only the strong ones are returned) # this means we need to get/track from each dataset we open some of the metadata, which we include as mandatory variables when constructing the wanted list + if not self.vars.wanted: + raise AttributeError( + 'No variables listed in self.vars.wanted. Please use the Variables class ' + 'via self.vars to search for desired variables to read and self.vars.append(...) ' + 'to add variables to the wanted variables list.' + ) + + # Append the minimum variables needed for icepyx to merge the datasets + # Skip products which do not contain required variables + if self.product not in ['ATL14', 'ATL15', 'ATL23']: + var_list=[ + "sc_orient", + "atlas_sdp_gps_epoch", + "cycle_number", + "rgt", + "data_start_utc", + "data_end_utc", + ] + + # Adjust the nec_varlist for individual products + if self.product == "ATL11": + var_list.remove("sc_orient") + + self.vars.append(defaults=False, var_list=var_list) + try: - groups_list = list_of_dict_vals(self._read_vars.wanted) + groups_list = list_of_dict_vals(self.vars.wanted) except AttributeError: pass diff --git a/icepyx/core/variables.py b/icepyx/core/variables.py index d46561f46..94645ca94 100644 --- a/icepyx/core/variables.py +++ b/icepyx/core/variables.py @@ -1,9 +1,13 @@ import numpy as np import os import pprint +import warnings from icepyx.core.auth import EarthdataAuthMixin import icepyx.core.is2ref as is2ref +from icepyx.core.exceptions import DeprecationError +import icepyx.core.validate_inputs as val +import icepyx.core as ipxc # DEVGOAL: use h5py to simplify some of these tasks, if possible! @@ -25,11 +29,21 @@ class Variables(EarthdataAuthMixin): contained in ICESat-2 products. Parameters - ---------- + ---------- vartype : string + This argument is deprecated. The vartype will be inferred from data_source. One of ['order', 'file'] to indicate the source of the input variables. This field will be auto-populated when a variable object is created as an attribute of a query object. + path : string, default None + The path to a local Icesat-2 file. The variables list will contain the variables + present in this file. Either path or product are required input arguments. + product : string, default None + Properly formatted string specifying a valid ICESat-2 product. The variables list will + contain all available variables for this product. Either product or path are required + input arguments. + version : string, default None + Properly formatted string specifying a valid version of the ICESat-2 product. avail : dictionary, default None Dictionary (key:values) of available variable names (keys) and paths (values). wanted : dictionary, default None @@ -38,47 +52,72 @@ class Variables(EarthdataAuthMixin): A session object authenticating the user to download data using their Earthdata login information. The session object will automatically be passed from the query object if you have successfully logged in there. - product : string, default None - Properly formatted string specifying a valid ICESat-2 product - version : string, default None - Properly formatted string specifying a valid version of the ICESat-2 product - path : string, default None - For vartype file, a path to a directory of or single input data file (not yet implemented) + """ def __init__( self, - vartype, - avail=None, - wanted=None, + vartype=None, + path=None, product=None, version=None, - path=None, + avail=None, + wanted=None, auth=None, ): - - assert vartype in ["order", "file"], "Please submit a valid variables type flag" + # Deprecation error + if vartype in ['order', 'file']: + raise DeprecationError( + 'It is no longer required to specify the variable type `vartype`. Instead please ', + 'provide either the path to a local file (arg: `path`) or the product you would ', + 'like variables for (arg: `product`).' + ) + + if path and product: + raise TypeError( + 'Please provide either a filepath or a product. If a filepath is provided ', + 'variables will be read from the file. If a product is provided all available ', + 'variables for that product will be returned.' + ) + # Set the product and version from either the input args or the file + if path: + self._path = path + self._product = is2ref.extract_product(self._path) + self._version = is2ref.extract_version(self._path) + elif product: + # Check for valid product string + self._product = is2ref._validate_product(product) + # Check for valid version string + # If version is not specified by the user assume the most recent version + self._version = val.prod_version(is2ref.latest_version(self._product), version) + else: + raise TypeError('Either a filepath or a product need to be given as input arguments.') + # initialize authentication properties EarthdataAuthMixin.__init__(self, auth=auth) - self._vartype = vartype - self.product = product self._avail = avail self.wanted = wanted # DevGoal: put some more/robust checks here to assess validity of inputs - - if self._vartype == "order": - if self._avail == None: - self._version = version - elif self._vartype == "file": - # DevGoal: check that the list or string are valid dir/files - self.path = path - - # @property - # def wanted(self): - # return self._wanted + + @property + def path(self): + if self._path: + path = self._path + else: + path = None + return path + + @property + def product(self): + return self._product + + @property + def version(self): + return self._version + def avail(self, options=False, internal=False): """ @@ -97,16 +136,14 @@ def avail(self, options=False, internal=False): . 'quality_assessment/gt3r/signal_selection_source_fraction_3'] """ - # if hasattr(self, '_avail'): - # return self._avail - # else: + if not hasattr(self, "_avail") or self._avail == None: - if self._vartype == "order": + if not hasattr(self, 'path'): self._avail = is2ref._get_custom_options( - self.session, self.product, self._version + self.session, self.product, self.version )["variables"] - - elif self._vartype == "file": + else: + # If a path was given, use that file to read the variables import h5py self._avail = [] @@ -446,53 +483,14 @@ def append(self, defaults=False, var_list=None, beam_list=None, keyword_list=Non and keyword_list == None ), "You must enter parameters to add to a variable subset list. If you do not want to subset by variable, ensure your is2.subsetparams dictionary does not contain the key 'Coverage'." - req_vars = {} + final_vars = {} - # if not hasattr(self, 'avail') or self.avail==None: self.get_avail() - # vgrp, paths = self.parse_var_list(self.avail) - # allpaths = [] - # [allpaths.extend(np.unique(np.array(paths[p]))) for p in range(len(paths))] vgrp, allpaths = self.avail(options=True, internal=True) - self._check_valid_lists(vgrp, allpaths, var_list, beam_list, keyword_list) - # add the mandatory variables to the data object - if self._vartype == "order": - nec_varlist = [ - "sc_orient", - "sc_orient_time", - "atlas_sdp_gps_epoch", - "data_start_utc", - "data_end_utc", - "granule_start_utc", - "granule_end_utc", - "start_delta_time", - "end_delta_time", - ] - elif self._vartype == "file": - nec_varlist = [ - "sc_orient", - "atlas_sdp_gps_epoch", - "cycle_number", - "rgt", - "data_start_utc", - "data_end_utc", - ] - - # Adjust the nec_varlist for individual products - if self.product == "ATL11": - nec_varlist.remove("sc_orient") - - try: - self._check_valid_lists(vgrp, allpaths, var_list=nec_varlist) - except ValueError: - # Assume gridded product since user input lists were previously validated - nec_varlist = [] - + # Instantiate self.wanted to an empty dictionary if it doesn't exist if not hasattr(self, "wanted") or self.wanted == None: - for varid in nec_varlist: - req_vars[varid] = vgrp[varid] - self.wanted = req_vars + self.wanted = {} # DEVGOAL: add a secondary var list to include uncertainty/error information for lower level data if specific data variables have been specified... @@ -501,21 +499,21 @@ def append(self, defaults=False, var_list=None, beam_list=None, keyword_list=Non # Case only variables (but not keywords or beams) are specified if beam_list == None and keyword_list == None: - req_vars.update(self._iter_vars(sum_varlist, req_vars, vgrp)) + final_vars.update(self._iter_vars(sum_varlist, final_vars, vgrp)) # Case a beam and/or keyword list is specified (with or without variables) else: - req_vars.update( - self._iter_paths(sum_varlist, req_vars, vgrp, beam_list, keyword_list) + final_vars.update( + self._iter_paths(sum_varlist, final_vars, vgrp, beam_list, keyword_list) ) # update the data object variables - for vkey in req_vars.keys(): + for vkey in final_vars.keys(): # add all matching keys and paths for new variables if vkey not in self.wanted.keys(): - self.wanted[vkey] = req_vars[vkey] + self.wanted[vkey] = final_vars[vkey] else: - for vpath in req_vars[vkey]: + for vpath in final_vars[vkey]: if vpath not in self.wanted[vkey]: self.wanted[vkey].append(vpath) From 8958ad251e13b4aa6eb15582794f91906e63186d Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Mon, 16 Oct 2023 16:20:48 -0400 Subject: [PATCH 12/14] use swap_dims function to simplify dataset manipulation in read fn and remove xarray warning --- icepyx/core/read.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/icepyx/core/read.py b/icepyx/core/read.py index 842eab51f..b5149ad59 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -279,7 +279,10 @@ class Read: String that shows the filename pattern as previously required for Intake's path_as_pattern argument. The default describes files downloaded directly from NSIDC (subsetted and non-subsetted) for most products (e.g. ATL06). The ATL11 filename pattern from NSIDC is: 'ATL{product:2}_{rgt:4}{orbitsegment:2}_{cycles:4}_{version:3}_{revision:2}.h5'. +<<<<<<< HEAD **Deprecation warning:** This argument is no longer required and will be deprecated in version 1.0.0. +======= +>>>>>>> use swap_dims function to simplify dataset manipulation in read fn and remove xarray warning catalog : string, default None Full path to an Intake catalog for reading in data. @@ -591,15 +594,20 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): .assign_coords( { spot_dim_name: (spot_dim_name, [spot]), - "delta_time": ("delta_time", photon_ids), + # "delta_time": ("delta_time", photon_ids), + "photon_idx": ("delta_time", photon_ids), } ) .assign({spot_var_name: (("gran_idx", spot_dim_name), [[track_str]])}) - .rename_dims({"delta_time": "photon_idx"}) - .rename({"delta_time": "photon_idx"}) + .swap_dims({"delta_time": "photon_idx"}) + # .rename_dims({"delta_time": "photon_idx"}) + # .rename({"delta_time": "photon_idx"}) # .set_index("photon_idx") ) + # DEVNOTE: simplifying the above (to use swap_dims) appears to keep delta_time as a coordinate + # in that case, the else below is not needed, and the if would need testing + # handle cases where the delta time is 2d due to multiple cycles in that group if spot_dim_name == "pair_track" and np.ndim(hold_delta_times) > 1: ds = ds.assign_coords( From 2292f1151b154080e9d9d6fc2b6076d16a3e9e71 Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Thu, 2 Nov 2023 14:07:33 -0400 Subject: [PATCH 13/14] start playing with removing extra code --- icepyx/core/read.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/icepyx/core/read.py b/icepyx/core/read.py index b5149ad59..88d8526ef 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -279,10 +279,7 @@ class Read: String that shows the filename pattern as previously required for Intake's path_as_pattern argument. The default describes files downloaded directly from NSIDC (subsetted and non-subsetted) for most products (e.g. ATL06). The ATL11 filename pattern from NSIDC is: 'ATL{product:2}_{rgt:4}{orbitsegment:2}_{cycles:4}_{version:3}_{revision:2}.h5'. -<<<<<<< HEAD **Deprecation warning:** This argument is no longer required and will be deprecated in version 1.0.0. -======= ->>>>>>> use swap_dims function to simplify dataset manipulation in read fn and remove xarray warning catalog : string, default None Full path to an Intake catalog for reading in data. @@ -613,8 +610,8 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): ds = ds.assign_coords( {"delta_time": (("photon_idx", "cycle_number"), hold_delta_times)} ) - else: - ds = ds.assign_coords({"delta_time": ("photon_idx", hold_delta_times)}) + # else: + # ds = ds.assign_coords({"delta_time": ("photon_idx", hold_delta_times)}) # for ATL11 if "ref_pt" in ds.coords: From 9bcfd09783240866ceca8b5bb38fc0b2c1dca5bb Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Mon, 20 Nov 2023 17:25:44 -0500 Subject: [PATCH 14/14] remove commented code --- icepyx/core/read.py | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/icepyx/core/read.py b/icepyx/core/read.py index b13cb1679..e136a1d64 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -320,7 +320,7 @@ class Read: # ---------------------------------------------------------------------- # Constructors - + def __init__( self, data_source=None, # DevNote: Make this a required arg when catalog is removed @@ -336,7 +336,7 @@ def __init__( "The `catalog` argument has been deprecated and intake is no longer supported. " "Please use the `data_source` argument to specify your dataset instead." ) - + if data_source is None: raise ValueError("data_source is a required arguemnt") # Raise warnings for deprecated arguments @@ -457,7 +457,7 @@ def vars(self): if not hasattr(self, "_read_vars"): self._read_vars = Variables(path=self.filelist[0]) return self._read_vars - + @property def filelist(self): """ @@ -591,27 +591,18 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): .assign_coords( { spot_dim_name: (spot_dim_name, [spot]), - # "delta_time": ("delta_time", photon_ids), "photon_idx": ("delta_time", photon_ids), } ) .assign({spot_var_name: (("gran_idx", spot_dim_name), [[track_str]])}) .swap_dims({"delta_time": "photon_idx"}) - # .rename_dims({"delta_time": "photon_idx"}) - # .rename({"delta_time": "photon_idx"}) - # .set_index("photon_idx") ) - # DEVNOTE: simplifying the above (to use swap_dims) appears to keep delta_time as a coordinate - # in that case, the else below is not needed, and the if would need testing - # handle cases where the delta time is 2d due to multiple cycles in that group if spot_dim_name == "pair_track" and np.ndim(hold_delta_times) > 1: ds = ds.assign_coords( {"delta_time": (("photon_idx", "cycle_number"), hold_delta_times)} ) - # else: - # ds = ds.assign_coords({"delta_time": ("photon_idx", hold_delta_times)}) # for ATL11 if "ref_pt" in ds.coords: @@ -726,15 +717,15 @@ def load(self): if not self.vars.wanted: raise AttributeError( - 'No variables listed in self.vars.wanted. Please use the Variables class ' - 'via self.vars to search for desired variables to read and self.vars.append(...) ' - 'to add variables to the wanted variables list.' + "No variables listed in self.vars.wanted. Please use the Variables class " + "via self.vars to search for desired variables to read and self.vars.append(...) " + "to add variables to the wanted variables list." ) - + # Append the minimum variables needed for icepyx to merge the datasets # Skip products which do not contain required variables - if self.product not in ['ATL14', 'ATL15', 'ATL23']: - var_list=[ + if self.product not in ["ATL14", "ATL15", "ATL23"]: + var_list = [ "sc_orient", "atlas_sdp_gps_epoch", "cycle_number", @@ -748,7 +739,7 @@ def load(self): var_list.remove("sc_orient") self.vars.append(defaults=False, var_list=var_list) - + try: groups_list = list_of_dict_vals(self.vars.wanted) except AttributeError: