From 7ecccda385b1c4d85956af5c26e9a62739485f62 Mon Sep 17 00:00:00 2001 From: "eli@water.ca.gov" Date: Tue, 16 Jul 2024 20:02:35 -0700 Subject: [PATCH] Fixed command line version of archive_ts and made it installable as CLR --- schimpy/archive_ts.py | 110 ++++++++++++++++++++++++++++-------------- setup.py | 1 + 2 files changed, 74 insertions(+), 37 deletions(-) diff --git a/schimpy/archive_ts.py b/schimpy/archive_ts.py index 0b17587..58d08c2 100755 --- a/schimpy/archive_ts.py +++ b/schimpy/archive_ts.py @@ -12,7 +12,7 @@ """ from schimpy.station import * -#from schimpy.paam import * +from schimpy.param import * import pandas as pd import os import yaml @@ -28,14 +28,28 @@ def create_arg_parser(): # Read in the input file parser = argparse.ArgumentParser( - description=""" - Archive time series from one simulation in a large with many alternatives - and store in a common location with better names and formats. - - Usage: - archive_ts --rundir mycase --ardir ../arnchive --label mycase --scenario_data={exp:1000,sjr:1400}" - - """) + description= + """ + Archive time series from one simulation in a large with many alternatives + and store in a common location with better names and formats. + """, + epilog= + """ + Usage:\n + + $ archive_ts --ardir archive_ts --rundir mss_base --label base + --scenario_data '{year: 2001, sjr: 1400, exports: 1500}' + --extracted '{flux.out: flux}' --stationfile fluxflag.prop --run_start 2021-01-01 + + $ archive_ts --ardir archive_ts --rundir mss_base --label base + --scenario_data '{year: 2001, sjr: 1400, exports:1500}' \n + + $ archive_ts --ardir archive_ts --rundir mss_base --label base + --scenario_data '{year: 2001, sjr: 1400, + exports: 1500}' --stationfile outputs/south_delta.bp + --extracted '{fracsjr_*.out: fracsjr, fracdelta_*.out: fracdelta}' + --time_sharded + """,formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--rundir', default=None, help='location of the run launch dir where param.nml resides.') parser.add_argument('--ardir', default=None, @@ -44,19 +58,23 @@ def create_arg_parser(): help='scenario label for output. Final name inarchive will be variable_label.csv') parser.add_argument('--scenario_data', default={},type=yaml.safe_load, help='dictionary of data that identifies scenario, which will be added as a column to the output. This is very helpful for stitching. ') - parser.add_argument('--stationfile',default='station.in',type=yaml.safe_load,help='name of annotated station.in or build pointfile') + parser.add_argument('--stationfile',default='station.in',type=yaml.safe_load,help='name of annotated station.in, build pointfile or fluxflag.prop/flow_xsects.yaml') - parser.add_argument('--extracted',default={"staout_1": "elev", "staout_5": "temp", "staout_6": "salt"},type=yaml.safe_load, + parser.add_argument('--extracted',default=None,type=yaml.safe_load, help='dictionary of extracted data in rundir/outputs in the form of "file_name: variable_name". ' - 'You can only one archive atation.in plus flux.out OR the files associated with a single build point on one invocation.' - 'On the command line note that this is quoted and requires a space after the colons') + 'You can only one atation.in OR flux.out OR the files associated with a single build point per invocation.' + 'On the command line note that this is quoted and requires a space after the colons' + 'Example: "{staout_1: elev, staout_5: temp, staout_6: salt}"') - parser.add_argument('--run_start',default=None,type=str, - help='start date of run. If omitted, will be parsed from param.nml') + parser.add_argument('--run_start',default=None,type=pd.to_datetime, + help='start date of run. ') + + parser.add_argument('--time_sharded',action='store_true', + help='if true, assume the extracted file is sharded in time. ') return parser -def archive_time_series(rundir, ardir,scenario_label=None,scenario_data={},staouts={},stationfile='station.in'): +def archive_time_series(rundir, ardir,runstart,scenario_label=None,scenario_data={},staouts={},stationfile='station.in',time_sharded=False): """ Archive time series from rundir/outputs to ardir rundir/param.nml must point to a file with the correct run start @@ -67,15 +85,16 @@ def archive_time_series(rundir, ardir,scenario_label=None,scenario_data={},staou if scenario_label is None: raise ValueError("Scenario label must be provided") - runstart = pd.Timestamp(2021,1,1) - do_flux = (stationfile == 'station.in') + do_flux = "flow" in stationfile or "flux" in stationfile if do_flux: - archive_flux(rundir,ardir,scenario_label,scenario_data,runstart) - archive_staout(rundir,ardir,scenario_label,scenario_data,runstart) - statouts={"fort.18": "sjrfrac"} - staouts={"sjrfrac.dat": "sjrfrac"} - #archive_staout(rundir,ardir,scenario_label,scenario_data,runstart,staouts,stationfile="outputs/south_delta.bp",time_unit='d') + archive_flux(rundir,ardir,scenario_label,stationfile,scenario_data,runstart=runstart) + else: + #archive_staout(rundir,ardir,scenario_label,scenario_data,runstart) + #statouts={"fort.18": "sjrfrac"} + #staouts={"sjrfrac.dat": "sjrfrac"} + tunit = 'd' if 'bp' in stationfile else 's' + archive_staout(rundir,ardir,scenario_label,scenario_data,staouts,stationfile,time_unit=tunit,runstart=runstart,time_sharded=time_sharded) def shard_number(x): a0 = os.path.splitext(x)[0] @@ -95,14 +114,22 @@ def get_ordered_files(loc,pat,time_sharded): all_files.sort(key=shard_number) return all_files -def archive_staout(rundir,ardir,scenario_label,scenario_data,runstart, +def infer_runstart(rundir): + """ Infer runstart based on param.nml in directory rundir """ + paramfile = os.path.join(rundir,"param.nml") + p = read_params(paramfile) + return p.get_run_start() + +def archive_staout(rundir,ardir,scenario_label,scenario_data, staouts=None,stationfile="station.in",float_format="%.3f", - time_unit='s',multi=False,elim_default=True,do_flux=False,time_sharded=False): + time_unit='s',multi=False,elim_default=True,do_flux=False,time_sharded=False,runstart=None): + if runstart is None: + runstart = infer_runstart(rundir) if staouts is None: staouts = {"staout_1": "elev","staout_5": "temp","staout_6":"salt"} if time_sharded: raise ValueError("Time sharding unexected for staout_* files") for s in staouts: - print(f"Processing {s} in directory {rundir}") + print(f"Processing {s} in run directory {rundir} time_sharded={time_sharded}") loc = os.path.join(rundir,"outputs") ofiles = get_ordered_files(loc,s,time_sharded) dfs = [] # For concatenation in time in case there are more than one @@ -110,8 +137,8 @@ def archive_staout(rundir,ardir,scenario_label,scenario_data,runstart, if len(ofiles) == 0: print(f"No files found for pattern {s}") continue - for fpath in ofiles: - df = read_staout(fpath,os.path.join(rundir,stationfile),reftime=runstart,time_unit=time_unit,multi=multi,elim_default=elim_default) + for fpath in ofiles: + df = read_staout(fpath,station_infile=os.path.join(rundir,stationfile),reftime=runstart,time_unit=time_unit,multi=multi,elim_default=elim_default) #df.pivot() for item in scenario_data: df[item] = scenario_data[item] @@ -122,13 +149,15 @@ def archive_staout(rundir,ardir,scenario_label,scenario_data,runstart, dfout = pd.concat(dfs,axis=0) scenario_fname = f"{varlabel}_{scenario_label}.csv" outfpath = os.path.join(ardir,scenario_fname) - dfout.to_csv(outfpath,sep=",",date_format="%Y-%m-%dT%H:%M",float_format=float_format) + dfout.to_csv(outfpath,sep=",",date_format="%Y-%m-%dT%H:%M:%S",float_format=float_format) -def archive_flux(rundir,ardir,scenario_label,scenario_data,runstart): +def archive_flux(rundir,ardir,scenario_label,stationfile,scenario_data,runstart): print(f"Processing flux.out in directory {rundir}") + if runstart is None: + runstart = infer_runstart(rundir) fpath = os.path.join(rundir,"outputs","flux.out") - df = read_flux_out(os.path.join(rundir,"outputs","flux.out"),names=os.path.join(rundir,"fluxflag.prop"),reftime=runstart) - #df.pivot() + df = read_flux_out(os.path.join(rundir,"outputs","flux.out"),names=os.path.join(rundir,stationfile),reftime=runstart) + print(df.head()) df.index.name="datetime" for item in scenario_data: df[item] = scenario_data[item] @@ -136,7 +165,7 @@ def archive_flux(rundir,ardir,scenario_label,scenario_data,runstart): varlabel = "flow" scenario_fname = f"{varlabel}_{scenario_label}.csv" outfpath = os.path.join(ardir,scenario_fname) - df.to_csv(outfpath,sep=",",date_format="%Y-%m-%dT%H%M") + df.to_csv(outfpath,sep=",",date_format="%Y-%m-%dT%H:%M:%S") def process_extracted_scalar(rundir,ardir,extract_data_file, variable, model_start, station_file, output_file): @@ -191,13 +220,19 @@ def main(): rundir = args.rundir ardir = args.ardir label = args.label + stationfile = args.stationfile scenario_data = args.scenario_data scenario_data=args.scenario_data staouts = args.extracted + time_sharded = args.time_sharded + print("time_sharded", time_sharded) + runstart = args.run_start + for key,val in scenario_data.items(): if val is None: raise ValueError("Value in scenario_data is None. On the command line this may be an omitted space after colon") if not staouts: + print("None") staouts = {"staout_1": "elev", "staout_5": "temp", "staout_6":"salt"} for key,val in staouts.items(): if val is None: @@ -207,10 +242,11 @@ def main(): archive_time_series(rundir, ardir, scenario_label=label, - scenario_data={}, - staouts={}, - stationfile='station.in' - ) + scenario_data=scenario_data, + staouts=staouts, + stationfile=stationfile, + time_sharded=time_sharded, + runstart=runstart) print(rundir) print(ardir) diff --git a/setup.py b/setup.py index 82a615c..bd0d3ca 100644 --- a/setup.py +++ b/setup.py @@ -81,6 +81,7 @@ "create_nudging=schimpy.nudging:main", "interpolate_structure=schimpy.interpolate_structure:main", "merge_th=schimpy.merge_th:main", + "archive_ts=schimpy.archive_ts:main", ], }, install_requires=requirements,