Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/passing adj dir to l3 processing plus attribute fix #292

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/process_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
mkdir $GITHUB_WORKSPACE/out/L0toL2/
mkdir $GITHUB_WORKSPACE/data_issues
for i in $(echo ${{ env.TEST_STATION }} | tr ' ' '\n'); do
python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2.py -c $GITHUB_WORKSPACE/aws-l0/tx/config/$i.toml -i $GITHUB_WORKSPACE/aws-l0/tx --issues $GITHUB_WORKSPACE/data_issues -o $GITHUB_WORKSPACE/out/L0toL2/
python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2.py -c $GITHUB_WORKSPACE/aws-l0/tx/config/$i.toml -i $GITHUB_WORKSPACE/aws-l0/tx --issues $GITHUB_WORKSPACE/data_issues -o $GITHUB_WORKSPACE/out/L0toL2/ --data_issues_path $GITHUB_WORKSPACE/data_issues
done
- name: Run L2 to L3 processing
env:
Expand All @@ -50,7 +50,7 @@ jobs:
mkdir $GITHUB_WORKSPACE/out/L2toL3/
for i in $(echo ${{ env.TEST_STATION }} | tr ' ' '\n'); do
echo ${i}_hour.nc
python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2tol3.py -c $GITHUB_WORKSPACE/aws-l0/metadata/station_configurations/ -i $GITHUB_WORKSPACE/out/L0toL2/${i}/${i}_hour.nc -o $GITHUB_WORKSPACE/out/L2toL3/
python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2tol3.py -c $GITHUB_WORKSPACE/aws-l0/metadata/station_configurations/ -i $GITHUB_WORKSPACE/out/L0toL2/${i}/${i}_hour.nc -o $GITHUB_WORKSPACE/out/L2toL3/ --data_issues_path $GITHUB_WORKSPACE/data_issues
done
- name: Upload test output
uses: actions/upload-artifact@v3
Expand Down
12 changes: 8 additions & 4 deletions src/pypromice/process/L2toL3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,15 @@
from sklearn.linear_model import LinearRegression
from pypromice.qc.github_data_issues import adjustData
from scipy.interpolate import interp1d
from pathlib import Path
import logging

logger = logging.getLogger(__name__)

def toL3(L2, station_config={}, T_0=273.15):
def toL3(L2,
data_adjustments_dir: Path,
station_config={},
T_0=273.15):
'''Process one Level 2 (L2) product to Level 3 (L3) meaning calculating all
derived variables:
- Turbulent fluxes
Expand Down Expand Up @@ -109,7 +113,7 @@ def toL3(L2, station_config={}, T_0=273.15):

# processing continuous surface height, ice surface height, snow height
try:
ds = process_surface_height(ds, station_config)
ds = process_surface_height(ds, data_adjustments_dir, station_config)
except Exception as e:
logger.error("Error processing surface height at %s"%L2.attrs['station_id'])
logging.error(e, exc_info=True)
Expand All @@ -130,7 +134,7 @@ def toL3(L2, station_config={}, T_0=273.15):
return ds


def process_surface_height(ds, station_config={}):
def process_surface_height(ds, data_adjustments_dir, station_config={}):
"""
Process surface height data for different site types and create
surface height variables.
Expand Down Expand Up @@ -180,7 +184,7 @@ def process_surface_height(ds, station_config={}):
ds.z_boom_l.sel(time=first_valid_index) - ds['z_boom_l'])

# Adjust data for the created surface height variables
ds = adjustData(ds, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf'])
ds = adjustData(ds, data_adjustments_dir, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf'])

# Convert to dataframe and combine surface height variables
df_in = ds[[v for v in ['z_surf_1', 'z_surf_2', 'z_ice_surf'] if v in ds.data_vars]].to_dataframe()
Expand Down
2 changes: 1 addition & 1 deletion src/pypromice/process/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def getL3(self):
"""Perform L2 to L3 data processing, including resampling and metadata
and attribute population"""
logger.info("Level 3 processing...")
self.L3 = toL3(self.L2)
self.L3 = toL3(self.L2, data_adjustments_dir=self.data_issues_repository / "adjustments")

def writeArr(self, dataset, outpath, t=None):
"""Write L3 data to .nc and .csv hourly and daily files
Expand Down
31 changes: 20 additions & 11 deletions src/pypromice/process/get_l2.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,27 @@ def get_l2(config_file, inpath, outpath, variables, metadata, data_issues_path:
# Define input path
station_name = config_file.split('/')[-1].split('.')[0]
station_path = os.path.join(inpath, station_name)

# checking that data_issues_path is valid
if data_issues_path is None:
data_issues_path = Path("../PROMICE-AWS-data-issues")
ladsmund marked this conversation as resolved.
Show resolved Hide resolved
if data_issues_path.exists():
logging.warning(f"data_issues_path is missing. Using default data issues path: {data_issues_path}")
else:
raise ValueError("data_issues_path is missing. Please provide a valid path to the data issues repository")

if os.path.exists(station_path):
aws = AWS(config_file, station_path, data_issues_repository=data_issues_path, var_file=variables, meta_file=metadata)
aws = AWS(config_file,
station_path,
data_issues_repository=data_issues_path,
var_file=variables,
meta_file=metadata)
else:
aws = AWS(config_file, inpath, data_issues_repository=data_issues_path, var_file=variables, meta_file=metadata)
aws = AWS(config_file,
inpath,
data_issues_repository=data_issues_path,
var_file=variables,
meta_file=metadata)

# Perform level 1 and 2 processing
aws.getL1()
Expand All @@ -58,21 +75,13 @@ def main():
stream=sys.stdout,
)

data_issues_path = args.data_issues_path
if data_issues_path is None:
data_issues_path = Path("../PROMICE-AWS-data-issues")
if data_issues_path.exists():
logging.warning(f"data_issues_path is missing. Using default data issues path: {data_issues_path}")
else:
raise ValueError(f"data_issues_path is missing. Please provide a valid path to the data issues repository")

_ = get_l2(
args.config_file,
args.inpath,
args.outpath,
args.variables,
args.metadata,
data_issues_path=data_issues_path,
args.data_issues_path,
)


Expand Down
30 changes: 26 additions & 4 deletions src/pypromice/process/get_l2tol3.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ def parse_arguments_l2tol3(debug_args=None):
required=False, help='File path to variables look-up table')
parser.add_argument('-m', '--metadata', default=None, type=str,
required=False, help='File path to metadata')
parser.add_argument('--data_issues_path', '--issues', default=None, help="Path to data issues repository")


args = parser.parse_args(args=debug_args)
return args

def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata):
def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, data_issues_path: Path|str):
if isinstance(config_folder, str):
config_folder = Path(config_folder)

Expand Down Expand Up @@ -68,9 +70,21 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata):
"project": "PROMICE",
"location_type": "ice sheet",
}


# checking that the adjustement directory is properly given
if data_issues_path is None:
data_issues_path = Path("../PROMICE-AWS-data-issues")
if data_issues_path.exists():
logging.warning(f"data_issues_path is missing. Using default data issues path: {data_issues_path}")
else:
raise ValueError("data_issues_path is missing. Please provide a valid path to the data issues repository")
else:
data_issues_path = Path(data_issues_path)

data_adjustments_dir = data_issues_path / "adjustments"

# Perform Level 3 processing
l3 = toL3(l2, station_config)
l3 = toL3(l2, data_adjustments_dir, station_config)

# Write Level 3 dataset to file if output directory given
v = pypromice.resources.load_variables(variables)
Expand All @@ -83,7 +97,15 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata):

def main():
args = parse_arguments_l2tol3()
_ = get_l2tol3(args.config_folder, args.inpath, args.outpath, args.variables, args.metadata)



_ = get_l2tol3(args.config_folder,
args.inpath,
args.outpath,
args.variables,
args.metadata,
args.data_issues_path)

if __name__ == "__main__":
main()
21 changes: 14 additions & 7 deletions src/pypromice/process/join_l3.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,9 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
l3_merged.z_ice_surf.to_series(), l3.z_ice_surf.to_series()
),
)


# saves attributes
attrs = l3_merged.attrs
# merging by time block
l3_merged = xr.concat(
(
Expand All @@ -504,6 +506,9 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
),
dim="time",
)

# restauring attributes
l3_merged.attrs = attrs

# Assign site id
if not l3_merged:
Expand All @@ -519,13 +524,15 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me
site_config_source_hash=get_commit_hash_and_check_dirty(config_folder),
gcnet_source_hash=get_commit_hash_and_check_dirty(folder_gcnet),
)

for stid, station_attributes in l3_merged.attrs["stations_attributes"].items():
station_source = json.loads(station_attributes["source"])
for k, v in station_source.items():
if k in site_source and site_source[k] != v:
site_source[k] = "multiple"
else:
site_source[k] = v
if "source" in station_attributes.keys():
station_source = json.loads(station_attributes["source"])
for k, v in station_source.items():
if k in site_source and site_source[k] != v:
site_source[k] = "multiple"
else:
site_source[k] = v
l3_merged.attrs["source"] = json.dumps(site_source)

v = pypromice.resources.load_variables(variables)
Expand Down
2 changes: 1 addition & 1 deletion src/pypromice/qc/github_data_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def _getDF(flag_file):
).dropna(how='all', axis='rows')
else:
df=None
logger.info(f"No {flag_file.split('/')[-2][:-1]} file to read.")
logger.info(f"No {flag_file} file to read.")
return df


Expand Down
1 change: 1 addition & 0 deletions tests/e2e/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def test_full_e2e(self):
outpath=output_l3.as_posix(),
variables=None,
metadata=None,
data_issues_path=data_issues_path,
)

# Part 4 Join L3: Merge Current data and historical GC-Net and convert to site
Expand Down
Loading