Version 2.1 release

Version 2.1 official release, more documentation to follow.
NCAR · Sep 14, 2022 · 3b90625 · 3b90625
2 parents a442cc7 + 0a53597
commit 3b90625
Show file tree

Hide file tree

Showing 82 changed files with 69,968 additions and 2,204 deletions.
diff --git a/.github/scripts/icar_install_utils.sh b/.github/scripts/icar_install_utils.sh
@@ -74,21 +74,24 @@ function icar_dependencies {
     sudo apt-get update
     sudo apt-get install libcurl4-gnutls-dev
     sudo apt-get install libfftw3-dev
+    sudo apt-get install netcdf-bin
+    sudo apt-get install libnetcdff-dev
+
     # Installing HDF5 currently not working for NetCDF
     # sudo apt-get install libhdf5-dev libhdf5-openmpi-dev
 
     export CPPFLAGS="$CPPFLAGS -I${INSTALLDIR}/include"
     export LDFLAGS="$LDFLAGS -L${INSTALLDIR}/lib"
 
-    # Install szip (used by hdf5)
-    install_szip
-    # Install HDF5
-    install_hdf5
+    # # Install szip (used by hdf5)
+    # install_szip
+    # # Install HDF5
+    # install_hdf5
 
-    # Install NetCDF-C
-    install_netcdf_c
-    # Install NetCDF fortran
-    install_netcdf_fortran
+    # # Install NetCDF-C
+    # install_netcdf_c
+    # # Install NetCDF fortran
+    # install_netcdf_fortran
 
     # put installed bin directory in PATH
     export PATH=${INSTALLDIR}/bin:$PATH
@@ -148,7 +151,17 @@ function gen_test_run_data {
 function execute_test_run {
     cp ${GITHUB_WORKSPACE}/src/icar ${GITHUB_WORKSPACE}/tests/
     cd ${GITHUB_WORKSPACE}/tests
-    ./icar icar_options.nm
+    echo "Starting ICAR run"
+    ./icar icar_options.nml
+    time_dim=$(ncdump -v time icar_out_000001_2020-12-01_00-00-00.nc | grep "time = UNLIMITED" | sed 's/[^0-9]*//g')
+
+    if [[ ${time_dim} == "1" ]]; then
+	echo "FAILURE: ICAR output time dimension should not be equal to one, it was ${time_dim}"
+	exit 1
+    else
+	echo "SUCCESS: time dimension is equal to ${time_dim}"
+	exit 0
+    fi
 }
 
 function icar_after_success {

diff --git a/helpers/aggregate_parallel_files.py b/helpers/aggregate_parallel_files.py
@@ -76,7 +76,7 @@ def set_up_dataset(d):
             data = np.zeros((nt, nz, ny + y_off, nx + x_off))
 
         # print(name, data.shape, dims, attrs)
-        data_vars[v] = xr.DataArray(data.astype(np.float32), dims=dims, name=name, attrs=attrs)#, coords=coords)
+        data_vars[v] = xr.DataArray(data.astype(d[v].dtype), dims=dims, name=name, attrs=attrs)#, coords=coords)
 
     ds = xr.Dataset(data_vars, attrs=d.attrs)
     ds.encoding = d.encoding

diff --git a/helpers/batch_submit.sh → helpers/batch_submit_LSF.sh b/helpers/batch_submit.sh → helpers/batch_submit_LSF.sh
@@ -4,7 +4,7 @@
 #
 #BSUB -P P48500028           # project code
 #BSUB -W 12:00               # wall-clock time (hrs:mins)
-#BSUB -n 1                   # number of tasks in job         
+#BSUB -n 1                   # number of tasks in job
 #BSUB -R "span[ptile=16]"    # run 16 MPI tasks per node
 #BSUB -J            run_name            # job name
 #BSUB -o job_output/run_name.%J.out     # job output file (%J is replaced by the job ID)
@@ -24,7 +24,7 @@ PREFIX=run_name
 # it is useful to keep all other filenames relative to $PREFIX
 # note that this is not required anywhere though
 OUTDIR=$PREFIX
-OPTFILE=${PREFIX}_options.nml  
+OPTFILE=${PREFIX}_options.nml
 BATCHFILE=${PREFIX}_batch_submit.sh
 TEMPLATE=${PREFIX}_template.nml
 
@@ -58,17 +58,17 @@ if [[ ! -e ${PREFIX}_finished ]]; then
     if [[ -e ${PREFIX}_running ]]; then
         $SETUP_RUN $OPTFILE $TEMPLATE
     fi
-    
+
     # declare that we have run before so the next job will know
     touch ${PREFIX}_running
-    
+
     # run the actual executable (e.g. icar options.nml)
     $EXE $OPTFILE
     # typically the job will get killed while icar is running
-    # but for some reason bkilling the job still lets it touch _finished... 
+    # but for some reason bkilling the job still lets it touch _finished...
     # maybe this will give it a chance to really kill it first?
     sleep 10
-    
+
     # if icar completes, we are done, tell the next job that we finished
     touch ${PREFIX}_finished
 else

diff --git a/helpers/batch_submit_PBS.sh b/helpers/batch_submit_PBS.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+#
+
+### Job Name (will be used as prefix later on!)
+#PBS -N icar_batch_run
+### Project code
+#PBS -A P48500028
+#PBS -l walltime=00:15:00
+#PBS -q regular
+### Merge output and error files
+#PBS -o job_output/log.out
+### job error file (PBS will not allow use of ${PBS_JOBID} here? )
+#PBS -e job_output/log.err
+### Select X nodes with 36 CPUs each for a total of 72 MPI processes
+#PBS -l select=1:ncpus=36:mpiprocs=36:ompthreads=1
+
+### PBS options for automation: https://gif.biotech.iastate.edu/submitting-dependency-jobs-using-pbs-torque
+
+# otherwise xarray is not available:
+module load conda/latest
+source /glade/u/apps/opt/conda/bin/activate
+
+# echo ${PBS_JOBID::7}
+
+# Set OpenMP variables
+export OMP_NUM_THREADS=1
+# export MP_TASK_AFFINITY=core:$OMP_NUM_THREADS
+
+# the easy way
+# icar icar_options.nml
+
+# the complex way (allows a continuous sequence of jobs)
+PREFIX=$PBS_JOBNAME
+
+# it is useful to keep all other filenames relative to $PREFIX
+# note that this is not required anywhere though
+OUTDIR=$PREFIX
+OPTFILE=${PREFIX}_options.nml
+BATCHFILE=${PREFIX}_batch_submit.sh
+TEMPLATE=${PREFIX}_template.nml
+
+# specify the location of the icar executable to use:
+EXE=${HOME}/bin/icar
+
+# various useful helper scripts (SETUP_RUN is critical)
+SETUP_RUN=${HOME}/icar/helpers/setup_next_run.py
+MAKE_TEMPLATE=${HOME}/icar/helpers/make_template.py
+MKOUTDIR=mkdir #<user_defined_path>/mkscratch.py # mkscratch creates the directory on scratch and links to it
+
+
+# --------------------------------------------------
+# SHOULD NOT NEED TO MODIFY ANYTHING BELOW THIS LINE
+# --------------------------------------------------
+
+# if the template file doesn't exist yet, make it
+if [[ ! -e $TEMPLATE ]]; then
+    $MAKE_TEMPLATE $OPTFILE $TEMPLATE > job_output/py_mktemp.out
+fi
+
+# if the output directory doesn't exist, create it
+if [[ ! -e $OUTDIR ]]; then
+    $MKOUTDIR $OUTDIR
+fi
+
+# if we didn't finish yet we have to continue -BK: but we print this in line 87, so 2 jobs max?
+if [[ ! -e ${PREFIX}_finished ]]; then
+    # first submit the next job dependant on this one
+    qsub -W depend=afterany:${PBS_JOBID} ${BATCHFILE}
+
+    # if we have run before, setup the appropriate restart options
+    if [[ -e ${PREFIX}_running ]]; then
+        # echo "setting up next run (setup_next_run.py)"
+        $SETUP_RUN $OPTFILE $TEMPLATE > job_output/py_setup.out
+    fi
+
+    # declare that we have run before so the next job will know
+    touch ${PREFIX}_running
+
+    # run the actual executable (e.g. icar options.nml)
+    cafrun -n 36 $EXE $OPTFILE >> job_output/icar${PBS_JOBID::7}.out
+    # typically the job will get killed while icar is running
+    # but for some reason bkilling the job still lets it touch _finished...
+    # maybe this will give it a chance to really kill it first?
+    sleep 20
+
+    # if icar completes, we are done, tell the next job that we finished
+    touch ${PREFIX}_finished
+else
+    # if the last job ran to completion, delete the inter-job communication files and exit
+    rm ${PREFIX}_running
+    rm ${PREFIX}_finished
+fi
diff --git a/helpers/batch_submit_SLURM.sh b/helpers/batch_submit_SLURM.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+### Job Name (will be used as prefix later on!)
+#SBATCH --job-name="ICAR_tst"
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=32
+#SBATCH --time=00:05:00
+#SBATCH --constraint=haswell
+#SBATCH --qos=debug
+### Project code
+#SBATCH --account=m4062
+### error and output files in separate folder, name with jobid (%x) an job name (%j)
+### N.B: create the job_output folder before submitting this job!
+#SBATCH --output=job_output/log-%x.%j.out
+#SBATCH --error=job_output/log-%x.%j.err
+
+# Make sure a python environment with xarray is available:
+module load python
+conda activate myenv
+
+# Set OpenMP variables
+export OMP_NUM_THREADS=1
+# export MP_TASK_AFFINITY=core:$OMP_NUM_THREADS
+
+# the easy way
+# icar icar_options.nml
+
+# the complex way (allows a continuous sequence of jobs)
+PREFIX=tst  ##$SBATCH_JOB_NAME
+
+# it is useful to keep all other filenames relative to $PREFIX
+# note that this is not required anywhere though
+OUTDIR=$PREFIX
+OPTFILE=options.nml  #${PREFIX}_options.nml
+BATCHFILE=batch_submit_SLURM.sh #${PREFIX}_batch_submit.sh
+TEMPLATE=${PREFIX}_template.nml
+
+# the ICAR executable to use
+EXE=$HOME/bin/icar_dbs
+
+# load any environmental settings to run icar properly (system dependent):
+. /global/cfs/cdirs/m4062/env_scripts/UO-GNU-env.sh
+
+
+# various useful helper scripts (SETUP_RUN is critical)
+SETUP_RUN=${HOME}/icar/helpers/setup_next_run.py
+MAKE_TEMPLATE=${HOME}/icar/helpers/make_template.py
+MKOUTDIR=mkdir #<user_defined_path>/mkscratch.py # mkscratch creates the directory on scratch and links to it
+
+
+
+# --------------------------------------------------
+# SHOULD NOT NEED TO MODIFY ANYTHING BELOW THIS LINE
+# --------------------------------------------------
+
+# if the template file doesn't exist yet, make it
+if [[ ! -e $TEMPLATE ]]; then
+    $MAKE_TEMPLATE $OPTFILE $TEMPLATE > job_output/py_mktemp.out
+fi
+
+# # if the output directory doesn't exist, create it
+# if [[ ! -e $OUTDIR ]]; then
+#     $MKOUTDIR $OUTDIR
+# fi
+
+# if we didn't finish yet we have to continue -BK: but we print this in line 87, so 2 jobs max?
+if [[ ! -e ${PREFIX}_finished ]]; then
+    # first submit the next job dependant on this one
+    # sub -w "ended(${PBS_JOBID})" < $BATCHFILE
+    # qsub -W depend=afterany:${PBS_JOBID} ${BATCHFILE}  ## PBS version
+    sbatch --dependency=afternotok:$SLURM_JOB_ID ${BATCHFILE}
+
+    # if we have run before, setup the appropriate restart options
+    if [[ -e ${PREFIX}_running ]]; then
+        # echo "setting up next run (setup_next_run.py)"
+        $SETUP_RUN $OPTFILE $TEMPLATE > job_output/py_setup.out
+    fi
+
+    # declare that we have run before so the next job will know
+    touch ${PREFIX}_running
+
+    # run the actual executable (e.g. icar options.nml)
+    # cafrun -n 36 $EXE $OPTFILE > job_output/icar_$SLURM_JOB_ID.out
+    cafrun -n 36 $EXE $OPTFILE >> job_output/icar.out ### if you prefer one log file for the icar output
+
+    # typically the job will get killed while icar is running
+    # but for some reason bkilling the job still lets it touch _finished...
+    # maybe this will give it a chance to really kill it first?
+    sleep 10
+
+    # if icar completes, we are done, tell the next job that we finished
+    # BK dont understand this: wont it prevent the next (or after-next job from starting (ln 63))
+    touch ${PREFIX}_finished
+else
+    # if the last job ran to completion, delete the inter-job communication files and exit
+    rm ${PREFIX}_running
+    rm ${PREFIX}_finished
+fi
diff --git a/helpers/erai/config.py b/helpers/erai/config.py
@@ -13,7 +13,7 @@
 def set_bounds(info):
     atm_file=info.atmdir+info.atmfile
     erai_file=atm_file.replace("_Y_","2000").replace("_M_","01").replace("_D_","01").replace("_h_","00")
-    varlist=["g4_lat_0","g4_lon_1"]
+    varlist=["g4_lat_0","g4_lon_1","Z_GDS4_HYBL","T_GDS4_HYBL","Q_GDS4_HYBL","LNSP_GDS4_HYBL","CLWC_GDS4_HYBL","CIWC_GDS4_HYBL","lv_HYBL2_a","lv_HYBL2_b","P0"]
     output_dir=info.nc_file_dir
     try:
         os.mkdir(output_dir)

diff --git a/helpers/erai/convert.py b/helpers/erai/convert.py
@@ -38,6 +38,29 @@ def convert_atm(data):
 
     return output_data
 
+def bfill(arr):
+    ''' from https://stackoverflow.com/questions/41190852/most-efficient-way-to-forward-fill-nan-values-in-numpy-array
+    '''
+    mask = np.isnan(arr)
+    idx = np.where(~mask, np.arange(mask.shape[1]), mask.shape[1] - 1)
+    idx = np.minimum.accumulate(idx[:, ::-1], axis=1)[:, ::-1]
+    out = arr[np.arange(idx.shape[0])[:,None], idx]
+    return out
+
+
+def numpy_fill(arr):
+    '''modified from Solution provided by Divakar.
+    from https://stackoverflow.com/questions/41190852/most-efficient-way-to-forward-fill-nan-values-in-numpy-array
+    '''
+    for i in range(arr.shape[0]):
+        mask = np.isnan(arr[i])
+        idx = np.where(~mask,np.arange(mask.shape[1]),0)
+        np.maximum.accumulate(idx,axis=1, out=idx)
+        out = arr[i,np.arange(idx.shape[0])[:,None], idx]
+        arr[i] = bfill(out) # in case there are still missing values on the left side
+
+    return arr
+
 # icar_sfc_var=["sensible_heat","latent_heat","hgt_98","PBL_height"]
 def convert_sfc(data):
     global last_longwave
@@ -53,6 +76,13 @@ def convert_sfc(data):
     output_data.lw              = data.lw[np.newaxis,::-1,:] / dt   # convert from Joules to W /m^2
     output_data.cp              = data.cp[np.newaxis,::-1,:] * 1000 # convert m to mm
 
+    output_data.landmask = data.landmask[np.newaxis,::-1,:]
+    # landval = data.tskin[np.argmax(data.landmask)] # ~273.15, alternatively, tskin[landmask>0.99].mean()
+    #  above seems to always create an array, and sometimes with very different values in it ... e.g. >300...
+    landval = 273.16
+    output_data["sst"]           = (data.tskin[np.newaxis,::-1,:] - (output_data.landmask * landval)) / (1 - output_data.landmask)
+    output_data["sst"][output_data.landmask>0.25] =np.nan
+    output_data["sst"] = numpy_fill(output_data["sst"])
     # this is now handled in io so it can just use the last value in the file, much simple
     #  ... though in some ways what is below is better as it integrates over a longer time period
     # if last_longwave==None:

diff --git a/helpers/erai/io_routines.py b/helpers/erai/io_routines.py
@@ -4,8 +4,8 @@
 from bunch import Bunch
 
 
-sfcvarlist=["SSHF_GDS4_SFC","SLHF_GDS4_SFC","Z_GDS4_SFC","BLH_GDS4_SFC","SSRD_GDS4_SFC","STRD_GDS4_SFC", "SKT_GDS4_SFC", "CP_GDS4_SFC"]
-icar_sfc_var=["sensible_heat","latent_heat","hgt_98","PBL_height","sw","lw", "tskin", "cp"]
+sfcvarlist=["SSHF_GDS4_SFC","SLHF_GDS4_SFC","Z_GDS4_SFC","BLH_GDS4_SFC","SSRD_GDS4_SFC","STRD_GDS4_SFC", "SSTK_GDS4_SFC", "CP_GDS4_SFC", "LSM_GDS4_SFC"]
+icar_sfc_var=["sensible_heat","latent_heat","hgt_98","PBL_height","sw","lw", "tskin", "cp","landmask"]
 
 atmvarlist=["Z_GDS4_HYBL","T_GDS4_HYBL","Q_GDS4_HYBL","LNSP_GDS4_HYBL","CLWC_GDS4_HYBL","CIWC_GDS4_HYBL","lv_HYBL2_a","lv_HYBL2_b","P0"]
 icar_atm_var=["gph","t","qv","ln_p_sfc","cloud","ice","sigma_a","sigma_b","P0"]
@@ -92,7 +92,7 @@ def load_atm(time,info):
     """Load atmospheric variable from a GRIB file"""
     uvfile,scfile=find_atm_file(time,info)
     uvnc_file=grib2nc(uvfile,atmuvlist,info.nc_file_dir)
-    scnc_file=grib2nc(scfile,atmvarlist,info.nc_file_dir)
+    scnc_file=grib2nc(scfile,atmvarlist+["g4_lat_0","g4_lon_1"],info.nc_file_dir)
 
     outputdata=Bunch()
     for s,v in zip(icar_uv_var,atmuvlist):

diff --git a/helpers/erai/output.py b/helpers/erai/output.py
@@ -63,9 +63,15 @@ def write_file(date,info,erai):
     atts=Bunch(long_name="Planetary Boundary Layer Height",units="m")
     extra_vars.append(Bunch(name="PBL_height",data=erai["PBL_height"],dims=dims2dt,dtype="f",attributes=atts))
 
+    atts=Bunch(long_name="Land fraction",units="")
+    extra_vars.append(Bunch(name="landfraction",data=erai["landmask"],dims=dims2dt,dtype="f",attributes=atts))
+
     atts=Bunch(long_name="Skin Temperature",units="K")
     extra_vars.append(Bunch(name="tskin",data=erai["tskin"],dims=dims2dt,dtype="f",attributes=atts))
 
+    atts=Bunch(long_name="Sea Surface Temperature",units="K")
+    extra_vars.append(Bunch(name="sst",data=erai["sst"],dims=dims2dt,dtype="f",attributes=atts))
+
     atts=Bunch(long_name="Convective precipitation",units="mm")
     extra_vars.append(Bunch(name="cp",data=erai["cp"],dims=dims2dt,dtype="f",attributes=atts))