Skip to content

Commit

Permalink
current status
Browse files Browse the repository at this point in the history
  • Loading branch information
rwegener2 committed Oct 23, 2023
1 parent b4d59d6 commit 3f4bfa7
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 45 deletions.
24 changes: 24 additions & 0 deletions icepyx/core/is2ref.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import h5py
import json
import numpy as np
import requests
Expand Down Expand Up @@ -333,3 +334,26 @@ def latest_version(product):
return max(
[entry["version_id"] for entry in _about_product["feed"]["entry"]]
)

def extract_product(filepath):
"""
Read the product type from the metadata of the file. Return the product as a string.
"""
with h5py.File(filepath, 'r') as f:
try:
product = f.attrs['short_name'].decode()
product = _validate_product(product)
except KeyError:
raise 'Unable to parse the product name from file metadata'
return product

def extract_version(filepath):
"""
Read the version from the metadata of the file. Return the version as a string.
"""
with h5py.File(filepath, 'r') as f:
try:
version = f['METADATA']['DatasetIdentification'].attrs['VersionID'].decode()
except KeyError:
raise 'Unable to parse the version from file metadata'
return version
33 changes: 17 additions & 16 deletions icepyx/core/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def __init__(
# Create a dictionary of the products as read from the metadata
product_dict = {}
for file_ in self._filelist:
product_dict[file_] = self._extract_product(file_)
product_dict[file_] = is2ref.extract_product(file_)

# Raise warnings or errors for muliple products or products not matching the user-specified product
all_products = list(set(product_dict.values()))
Expand Down Expand Up @@ -472,21 +472,6 @@ def product(self):

# ----------------------------------------------------------------------
# Methods

@staticmethod
def _extract_product(filepath):
"""
Read the product type from the metadata of the file. Return the product as a string.
"""
with h5py.File(filepath, 'r') as f:
try:
product = f.attrs['short_name'].decode()
product = is2ref._validate_product(product)
# TODO test that this is the proper error
except KeyError:
raise 'Unable to parse the product name from file metadata'
return product

@staticmethod
def _check_source_for_pattern(source, filename_pattern):
"""
Expand Down Expand Up @@ -735,6 +720,22 @@ def load(self):
# so to get a combined dataset, we need to keep track of spots under the hood, open each group, and then combine them into one xarray where the spots are IDed somehow (or only the strong ones are returned)
# this means we need to get/track from each dataset we open some of the metadata, which we include as mandatory variables when constructing the wanted list

# Append the minimum variables needed for icepyx to merge the datasets
# Adjust the nec_varlist for individual products
var_list=[
"sc_orient",
"atlas_sdp_gps_epoch",
"cycle_number",
"rgt",
"data_start_utc",
"data_end_utc",
]

if self.product == "ATL11":
var_list.remove("sc_orient")

self._read_vars.append(defaults=False, var_list=var_list)
print(self._read_vars)
try:
groups_list = list_of_dict_vals(self._read_vars.wanted)
except AttributeError:
Expand Down
59 changes: 30 additions & 29 deletions icepyx/core/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ def __init__(
# Set the product and version from either the input args or the file
if path:
self.path = path
self.product = is2ref.extract_product(self.path)
# TODO what is the best way to fill in the version here? --> read it from the file
self.version = is2ref.extract_version(self.path)
elif product:
# Check for valid product string
self.product = is2ref._validate_product(product)
Expand Down Expand Up @@ -475,42 +478,40 @@ def append(self, defaults=False, var_list=None, beam_list=None, keyword_list=Non
self._check_valid_lists(vgrp, allpaths, var_list, beam_list, keyword_list)

# add the mandatory variables to the data object
# TODO QUESTION - why is this distinction made? How can we handle it wihtout vartype?
if self._vartype == "order":
nec_varlist = [
"sc_orient",
"sc_orient_time",
"atlas_sdp_gps_epoch",
"data_start_utc",
"data_end_utc",
"granule_start_utc",
"granule_end_utc",
"start_delta_time",
"end_delta_time",
]
elif self._vartype == "file":
nec_varlist = [
"sc_orient",
"atlas_sdp_gps_epoch",
"cycle_number",
"rgt",
"data_start_utc",
"data_end_utc",
]

# Adjust the nec_varlist for individual products
if self.product == "ATL11":
nec_varlist.remove("sc_orient")
# TODO QUESTION - why is this distinction made? How can we handle it without vartype?
# if self._vartype == "order":
# nec_varlist = [
# "sc_orient",
# "sc_orient_time",
# "atlas_sdp_gps_epoch",
# "data_start_utc",
# "data_end_utc",
# "granule_start_utc",
# "granule_end_utc",
# "start_delta_time",
# "end_delta_time",
# ]
# elif self._vartype == "file":
# nec_varlist = [
# "sc_orient",
# "atlas_sdp_gps_epoch",
# "cycle_number",
# "rgt",
# "data_start_utc",
# "data_end_utc",
# ]

try:
self._check_valid_lists(vgrp, allpaths, var_list=nec_varlist)
self._check_valid_lists(vgrp, allpaths)
except ValueError:
# Assume gridded product since user input lists were previously validated
nec_varlist = []

# TODO there is a lot of logic next that handles the required variables. Does it make
# sense to even have required variables anymore, if we are handling those via query/read?
if not hasattr(self, "wanted") or self.wanted == None:
for varid in nec_varlist:
req_vars[varid] = vgrp[varid]
# for varid in nec_varlist:
# req_vars[varid] = vgrp[varid]
self.wanted = req_vars

# DEVGOAL: add a secondary var list to include uncertainty/error information for lower level data if specific data variables have been specified...
Expand Down

0 comments on commit 3f4bfa7

Please sign in to comment.