From 5fbe5c14c11a7761336bd2a07079a3e3b9978645 Mon Sep 17 00:00:00 2001 From: ChiaraMonforte Date: Wed, 13 Nov 2024 12:19:33 +0100 Subject: [PATCH 1/4] [REFRACTOR] Refractoring code, adjust demo and refractor tests --- glidertest/plots.py | 901 +++++++++++++++++++++++++++++++ glidertest/tools.py | 1132 +-------------------------------------- glidertest/utilities.py | 265 +++++++++ notebooks/demo.ipynb | 50 +- tests/test_plots.py | 97 ++++ tests/test_tools.py | 83 +-- tests/test_utilities.py | 31 ++ 7 files changed, 1339 insertions(+), 1220 deletions(-) create mode 100644 glidertest/plots.py create mode 100644 glidertest/utilities.py create mode 100644 tests/test_plots.py create mode 100644 tests/test_utilities.py diff --git a/glidertest/plots.py b/glidertest/plots.py new file mode 100644 index 0000000..c5b7528 --- /dev/null +++ b/glidertest/plots.py @@ -0,0 +1,901 @@ +import matplotlib.dates as mdates +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +import xarray as xr +from matplotlib.dates import DateFormatter +from pandas import DataFrame +from scipy import stats +import matplotlib.colors as mcolors +import gsw +import cartopy.crs as ccrs +import cartopy.feature as cfeature +import warnings +from glidertest import utilities + +def plot_updown_bias(df: pd.DataFrame, ax: plt.Axes = None, xlabel='Temperature [C]', **kw: dict, ) -> tuple({plt.Figure, plt.Axes}): + """ + This function can be used to plot the up and downcast differences computed with the updown_bias function + + Parameters + ---------- + df: pandas dataframe containing dc (Dive - Climb average), cd (Climb - Dive average) and depth + ax: axis to plot the data + xlabel: label for the x-axis + + Returns + ------- + A line plot comparing the day and night average over depth for the selected day + + Original author + ---------------- + Chiara Monforte + """ + if ax is None: + fig, ax = plt.subplots(figsize=(5, 5)) + else: + fig = plt.gcf() + + if not all(hasattr(df, attr) for attr in ['dc', 'depth']): + ax.text(0.5, 0.55, xlabel, va='center', ha='center', transform=ax.transAxes, bbox=dict(facecolor='white', alpha=0.5, edgecolor='none')) + ax.text(0.5, 0.45, 'data unavailable', va='center', ha='center', transform=ax.transAxes, bbox=dict(facecolor='white', alpha=0.5, edgecolor='none')) + else: + ax.plot(df.dc, df.depth, label='Dive-Climb') + ax.plot(df.cd, df.depth, label='Climb-Dive') + ax.legend(loc=3) + lims = np.abs(df.dc) + ax.set_xlim(-np.nanpercentile(lims, 99.5), np.nanpercentile(lims, 99.5)) + ax.set_ylim(df.depth.max() + 1, -df.depth.max() / 30) + ax.set_xlabel(xlabel) + ax.grid() + return fig, ax + +def plot_basic_vars(ds: xr.Dataset, v_res=1, start_prof=0, end_prof=-1): + """ + This function plots the basic oceanographic variables temperature, salinity and density. A second plot is created and filled with oxygen and + chlorophyll data if available. + + Parameters + ---------- + ds: xarray in OG1 format containing at least temperature, salinity and density and depth + v_res: vertical resolution for the gridding. Horizontal resolution (by profile) is assumed to be 1 + start_prof: start profile used to compute the means that will be plotted. This can vary in case the dataset spread over a large timescale + or region and subsections want to be plotted-1 + end_prof: end profile used to compute the means that will be plotted. This can vary in case the dataset spread over a large timescale + or region and subsections want to be plotted-1 + + Returns + ------- + Line plots for the averages of the different variables. + Thermo, halo and pycnocline are computed and plotted. A sentence stating the depth of the clines is printed too + + Original author + ---------------- + Chiara Monforte + """ + utilities._check_necessary_variables(ds, ['PROFILE_NUMBER', 'DEPTH', 'TEMP', 'PSAL', 'LATITUDE', 'LONGITUDE']) + ds = utilities._calc_teos10_variables(ds) + p = 1 + z = v_res + tempG, profG, depthG = utilities.construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.TEMP, p, z) + salG, profG, depthG = utilities.construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.PSAL, p, z) + denG, profG, depthG = utilities.construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.DENSITY, p, z) + + + tempG = tempG[start_prof:end_prof, :] + salG = salG[start_prof:end_prof, :] + denG = denG[start_prof:end_prof, :] + depthG = depthG[start_prof:end_prof, :] + + halo = utilities.compute_cline(salG, depthG) + thermo = utilities.compute_cline(tempG, depthG) + pycno = utilities.compute_cline(denG, depthG) + print( + f'The thermocline, halocline and pycnocline are located at respectively {thermo}, {halo} and {pycno}m as shown in the plots as well') + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=RuntimeWarning) + fig, ax = plt.subplots(1, 2, figsize=(15, 5)) + ax1 = ax[0].twiny() + ax2 = ax[0].twiny() + ax2.spines["top"].set_position(("axes", 1.2)) + ax[0].plot(np.nanmean(tempG, axis=0), depthG[0, :], c='blue') + ax1.plot(np.nanmean(salG, axis=0), depthG[0, :], c='red') + ax2.plot(np.nanmean(denG, axis=0), depthG[0, :], c='black') + ax[0].axhline(thermo, linestyle='dashed', c='blue') + ax1.axhline(halo, linestyle='dashed', c='red') + ax2.axhline(pycno, linestyle='dashed', c='black') + + ax[0].set(xlabel=f'Average Temperature [C] \nbetween profile {start_prof} and {end_prof}', ylabel='Depth (m)') + ax[0].tick_params(axis='x', colors='blue') + ax[0].xaxis.label.set_color('blue') + ax1.spines['bottom'].set_color('blue') + ax1.set(xlabel=f'Average Salinity [PSU] \nbetween profile {start_prof} and {end_prof}') + ax1.xaxis.label.set_color('red') + ax1.spines['top'].set_color('red') + ax1.tick_params(axis='x', colors='red') + ax2.spines['bottom'].set_color('black') + ax2.set(xlabel=f'Average Density [kg m-3] \nbetween profile {start_prof} and {end_prof}') + ax2.xaxis.label.set_color('black') + ax2.spines['top'].set_color('black') + ax2.tick_params(axis='x', colors='black') + + if 'CHLA' in ds.variables: + chlaG, profG, depthG = utilities.construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.CHLA, p, z) + chlaG = chlaG[start_prof:end_prof, :] + ax2_1 = ax[1].twiny() + ax2_1.plot(np.nanmean(chlaG, axis=0), depthG[0, :], c='green') + ax2_1.set(xlabel=f'Average Chlorophyll-a [mg m-3] \nbetween profile {start_prof} and {end_prof}') + ax2_1.xaxis.label.set_color('green') + ax2_1.spines['top'].set_color('green') + ax2_1.tick_params(axis='x', colors='green') + else: + ax[1].text(0.3, 0.7, 'Chlorophyll data unavailable', va='top', transform=ax[1].transAxes) + + if 'DOXY' in ds.variables: + oxyG, profG, depthG = utilities.construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.DOXY, p, z) + oxyG = oxyG[start_prof:end_prof, :] + ax[1].plot(np.nanmean(oxyG, axis=0), depthG[0, :], c='orange') + ax[1].set(xlabel=f'Average Oxygen [mmol m-3] \nbetween profile {start_prof} and {end_prof}') + ax[1].xaxis.label.set_color('orange') + ax[1].spines['top'].set_color('orange') + ax[1].tick_params(axis='x', colors='orange') + ax[1].spines['bottom'].set_color('orange') + else: + ax[1].text(0.3, 0.5, 'Oxygen data unavailable', va='top', transform=ax[1].transAxes) + + [a.set_ylim(depthG.max() + 10, -5) for a in ax] + [a.grid() for a in ax] + return fig, ax + + +def process_optics_assess(ds, var='CHLA'): + """ + Function to assess visually any drift in deep optics data and the presence of any possible negative data. This function returns both plots and text + + Parameters + ---------- + ds: xarray dataset in OG1 format containing at least time, depth and the selected optical variable + var: name of the selected variable + + Returns + ------- + Text giving info on where and when negative data was observed + Plot showing bottom data with a linear regression line to highlight any drift + + Original author + ---------------- + Chiara Monforte + """ + utilities._check_necessary_variables(ds, [var, 'TIME', 'DEPTH']) + # Check how much negative data there is + neg_chl = np.round((len(np.where(ds[var] < 0)[0]) * 100) / len(ds[var]), 1) + if neg_chl > 0: + print(f'{neg_chl}% of scaled {var} data is negative, consider recalibrating data') + # Check where the negative values occur and if we just see them at specific time of the mission or not + start = ds.TIME[np.where(ds[var] < 0)][0] + end = ds.TIME[np.where(ds[var] < 0)][-1] + min_z = ds.DEPTH[np.where(ds[var] < 0)].min().values + max_z = ds.DEPTH[np.where(ds[var] < 0)].max().values + print(f'Negative data in present from {str(start.values)[:16]} to {str(end.values)[:16]}') + print(f'Negative data is present between {"%.1f" % np.round(min_z, 1)} and {"%.1f" % np.round(max_z, 1)} ') + else: + print(f'There is no negative scaled {var} data, recalibration and further checks are still recommended ') + # Check if there is any missing data throughout the mission + if len(ds.TIME) != len(ds[var].dropna(dim='N_MEASUREMENTS').TIME): + print(f'{var} data is missing for part of the mission') # Add to specify where the gaps are + else: + print(f'{var} data is present for the entire mission duration') + # Check bottom dark count and any drift there + bottom_opt_data = ds[var].where(ds[var].DEPTH > ds.DEPTH.max() - (ds.DEPTH.max() * 0.1)).dropna( + dim='N_MEASUREMENTS') + slope, intercept, r_value, p_value, std_err = stats.linregress(np.arange(0, len(bottom_opt_data)), bottom_opt_data) + ax = sns.regplot(data=ds, x=np.arange(0, len(bottom_opt_data)), y=bottom_opt_data, + scatter_kws={"color": "grey"}, + line_kws={"color": "red", "label": "y={0:.8f}x+{1:.5f}".format(slope, intercept)}, + ) + ax.legend(loc=2) + ax.grid() + ax.set(ylim=(np.nanpercentile(bottom_opt_data, 0.5), np.nanpercentile(bottom_opt_data, 99.5)), + xlabel='Measurements', + ylabel=var) + percentage_change = (((slope * len(bottom_opt_data) + intercept) - intercept) / abs(intercept)) * 100 + + if abs(percentage_change) >= 1: + print( + 'Data from the deepest 10% of data has been analysed and data does not seem perfectly stable. An alternative solution for dark counts has to be considered. \nMoreover, it is recommended to check the sensor has this may suggest issues with the sensor (i.e water inside the sensor, temporal drift etc)') + print( + f'Data changed (increased or decreased) by {"%.1f" % np.round(percentage_change, 1)}% from the beginning to the end of the mission') + else: + print( + f'Data from the deepest 10% of data has been analysed and data seems stable. These deep values can be used to re-assess the dark count if the no {var} at depth assumption is valid in this site and this depth') + return ax + + +def plot_daynight_avg(day: pd.DataFrame, night: pd.DataFrame, ax: plt.Axes = None, sel_day=None, + xlabel='Chlorophyll [mg m-3]', **kw: dict, ) -> tuple({plt.Figure, plt.Axes}): + """ + This function can be used to plot the day and night averages computed with the day_night_avg function + + Parameters + ---------- + day: pandas dataframe containing the day averages + night: pandas dataframe containing the night averages + ax: axis to plot the data + sel_day: selected day to plot. Defaults to the median day + xlabel: label for the x-axis + + Returns + ------- + A line plot comparing the day and night average over depth for the selected day + + Original author + ---------------- + Chiara Monforte + + """ + if not sel_day: + dates = list(day.date.dropna().values) + list(night.date.dropna().values) + dates.sort() + sel_day = dates[int(len(dates)/2)] + if ax is None: + fig, ax = plt.subplots(figsize=(5, 5)) + else: + fig = plt.gcf() + ax.plot(night.where(night.date == sel_day).dropna().dat, night.where(night.date == sel_day).dropna().depth, + label='Night time average') + ax.plot(day.where(day.date == sel_day).dropna().dat, day.where(day.date == sel_day).dropna().depth, + label='Daytime average') + ax.legend() + ax.invert_yaxis() + ax.grid() + ax.set(xlabel=xlabel, ylabel='Depth [m]') + ax.set_title(sel_day) + return fig, ax + + +def plot_quench_assess(ds: xr.Dataset, sel_var: str, ax: plt.Axes = None, start_time=None, + end_time=None,start_prof=None, end_prof=None, ylim=45, **kw: dict, ) -> tuple({plt.Figure, plt.Axes}): + """ + This function can be used to plot sections for any variable with the sunrise and sunset plotted over + + Parameters + ---------- + ds: xarray on OG1 format containing at least time, depth, latitude, longitude and the selected variable. + Data should not be gridded. + sel_var: selected variable to plot + ax: axis to plot the data + start_time: Start date of the data selection format 'YYYY-MM-DD'. As missions can be long and came make it hard to visualise NPQ effect. + Defaults to mid 4 days + end_time: End date of the data selection format 'YYYY-MM-DD'. As missions can be long and came make it hard to visualise NPQ effect. + Defaults to mid 4 days + start_prof: Start profile of the data selection. If no profile is specified, the specified time selection will be used or the mid 4 days of the deployment + end_prof: End profile of the data selection. If no profile is specified, the specified time selection will be used or the mid 4 days of the deployment + ylim: specified limit for the maximum y-axis value. The minimum is computed as ylim/30 + + Returns + ------- + A section showing the variability of the selected data over time and depth + + Original author + ---------------- + Chiara Monforte + """ + utilities._check_necessary_variables(ds, ['TIME', sel_var, 'DEPTH']) + if ax is None: + fig, ax = plt.subplots(figsize=(5, 5)) + else: + fig = plt.gcf() + + if "TIME" not in ds.indexes.keys(): + ds = ds.set_xindex('TIME') + + if not start_time: + start_time = ds.TIME.mean() - np.timedelta64(2, 'D') + if not end_time: + end_time = ds.TIME.mean() + np.timedelta64(2, 'D') + + if start_prof and end_prof: + t1 = ds.TIME.where(ds.PROFILE_NUMBER==start_prof).dropna(dim='N_MEASUREMENTS')[0] + t2 = ds.TIME.where(ds.PROFILE_NUMBER==end_prof).dropna(dim='N_MEASUREMENTS')[-1] + ds_sel = ds.sel(TIME=slice(t1,t2)) + else: + ds_sel = ds.sel(TIME=slice(start_time, end_time)) + + if len(ds_sel.TIME) == 0: + msg = f"supplied limits start_time: {start_time} end_time: {end_time} do not overlap with dataset TIME range {str(ds.TIME.values.min())[:10]} - {str(ds.TIME.values.max())[:10]}" + raise ValueError(msg) + + sunrise, sunset = utilities.compute_sunset_sunrise(ds_sel.TIME, ds_sel.LATITUDE, ds_sel.LONGITUDE) + + c = ax.scatter(ds_sel.TIME, ds_sel.DEPTH, c=ds_sel[sel_var], s=10, vmin=np.nanpercentile(ds_sel[sel_var], 0.5), + vmax=np.nanpercentile(ds_sel[sel_var], 99.5)) + ax.set_ylim(ylim, -ylim / 30) + for n in np.unique(sunset): + ax.axvline(np.unique(n), c='blue') + for m in np.unique(sunrise): + ax.axvline(np.unique(m), c='orange') + ax.set_ylabel('Depth [m]') + plt.colorbar(c, label=f'{sel_var} [{ds[sel_var].units}]') + return fig, ax + + +def check_temporal_drift(ds: xr.Dataset, var: str, ax: plt.Axes = None, **kw: dict, ) -> tuple({plt.Figure, plt.Axes}): + """ + This function can be used to plot sections for any variable with the sunrise and sunset plotted over + + Parameters + ---------- + ds: xarray on OG1 format containing at least time, depth, latitude, longitude and the selected variable. + Data should not be gridded. + var: selected variable to plot + ax: axis to plot the data + + Returns + ------- + A figure with two subplots. One is a section containing the data over time and depth. The other one is a scatter of data from the variable + over depth and colored by date + + Original author + ---------------- + Chiara Monforte + """ + utilities._check_necessary_variables(ds, ['TIME', var, 'DEPTH']) + if ax is None: + fig, ax = plt.subplots(1, 2, figsize=(14, 6)) + else: + fig = plt.gcf() + + ax[0].scatter(mdates.date2num(ds.TIME), ds[var], s=10) + ax[0].xaxis.set_major_formatter(DateFormatter('%Y-%m-%d')) + ax[0].set(ylim=(np.nanpercentile(ds[var], 0.01), np.nanpercentile(ds[var], 99.99)), ylabel=var) + + c = ax[1].scatter(ds[var], ds.DEPTH, c=mdates.date2num(ds.TIME), s=10) + ax[1].set(xlim=(np.nanpercentile(ds[var], 0.01), np.nanpercentile(ds[var], 99.99)), ylabel='Depth (m)', xlabel=var) + ax[1].invert_yaxis() + + [a.grid() for a in ax] + plt.colorbar(c, format=DateFormatter('%b %d')) + return fig, ax + + +def plot_prof_monotony(ds: xr.Dataset, ax: plt.Axes = None, **kw: dict, ) -> tuple({plt.Figure, plt.Axes}): + + """ + This function can be used to plot the profile number and check for any possible issues with the profile index assigned. + + Parameters + ---------- + ds: xarray dataset in OG1 format with at least PROFILE_NUMBER, TIME, DEPTH. Data should not be gridded + ax: axis to plot the data + + Returns + ------- + Two plots, one line plot with the profile number over time (expected to be always increasing). A + second plot which is a scatter plot showing at which depth over time there was a profile index where the + difference was neither 0 nor 1 (meaning there are possibly issues with how the profile index was assigned). + + Original author + ---------------- + Chiara Monforte + + """ + utilities._check_necessary_variables(ds, ['TIME', 'PROFILE_NUMBER', 'DEPTH']) + if ax is None: + fig, ax = plt.subplots(2, 1, figsize=(10, 5), sharex=True) + else: + fig = plt.gcf() + + ax[0].plot(ds.TIME, ds.PROFILE_NUMBER) + ax[0].set(ylabel='Profile_Number') + if len(np.where((np.diff(ds.PROFILE_NUMBER) != 0) & (np.diff(ds.PROFILE_NUMBER) != 1))[0]) == 0: + ax[1].text(0.2, 0.5, 'Data in monotonically increasing and no issues can be observed', + transform=ax[1].transAxes) + else: + ax[1].scatter(ds.TIME[np.where((np.diff(ds.PROFILE_NUMBER) != 0) & (np.diff(ds.PROFILE_NUMBER) != 1))], + ds.DEPTH[np.where((np.diff(ds.PROFILE_NUMBER) != 0) & (np.diff(ds.PROFILE_NUMBER) != 1))], + s=10, c='red', label='Depth at which we have issues \n with the profile number assigned') + ax[1].legend() + ax[1].set(ylabel='Depth') + ax[1].invert_yaxis() + ax[1].xaxis.set_major_locator(plt.MaxNLocator(8)) + [a.grid() for a in ax] + return fig, ax + + +def plot_glider_track(ds: xr.Dataset, ax: plt.Axes = None, **kw: dict) -> tuple({plt.Figure, plt.Axes}): + """ + This function plots the glider track on a map, with latitude and longitude colored by time. + + Parameters + ---------- + ds: xarray in OG1 format with at least TIME, LATITUDE, and LONGITUDE. + ax: Optional; axis to plot the data. + kw: Optional; additional keyword arguments for the scatter plot. + + Returns + ------- + One plot with the map of the glider track. + fig: matplotlib.figure.Figure + ax: matplotlib.axes._subplots.AxesSubplot + + Original author + ---------------- + Eleanor Frajka-Williams + """ + utilities._check_necessary_variables(ds, ['TIME', 'LONGITUDE', 'LATITUDE']) + if ax is None: + fig, ax = plt.subplots(figsize=(10, 6), subplot_kw={'projection': ccrs.PlateCarree()}) + else: + fig = plt.gcf() + + latitudes = ds.LATITUDE.values + longitudes = ds.LONGITUDE.values + times = ds.TIME.values + + # Reduce the number of latitudes, longitudes, and times to no more than 2000 + if len(latitudes) > 2000: + indices = np.linspace(0, len(latitudes) - 1, 2000).astype(int) + latitudes = latitudes[indices] + longitudes = longitudes[indices] + times = times[indices] + + # Convert time to the desired format + time_labels = [pd.to_datetime(t).strftime('%Y-%b-%d') for t in times] + + # Plot latitude and longitude colored by time + sc = ax.scatter(longitudes, latitudes, c=times, cmap='viridis', **kw) + + # Add colorbar with formatted time labels + cbar = plt.colorbar(sc, ax=ax) #, label='Time') + cbar.ax.set_yticklabels([pd.to_datetime(t).strftime('%Y-%b-%d') for t in cbar.get_ticks()]) + + ax.set_extent([np.min(longitudes)-1, np.max(longitudes)+1, np.min(latitudes)-1, np.max(latitudes)+1], crs=ccrs.PlateCarree()) + + # Add features to the map + ax.add_feature(cfeature.LAND) + ax.add_feature(cfeature.OCEAN) + ax.add_feature(cfeature.COASTLINE) + + ax.set_xlabel('Longitude') + ax.set_ylabel('Latitude') + ax.set_title('Glider Track') + gl = ax.gridlines(draw_labels=True, color='black', alpha=0.5, linestyle='--') + gl.top_labels = False + gl.right_labels = False + plt.show() + + return fig, ax + +def plot_grid_spacing(ds: xr.Dataset, ax: plt.Axes = None, **kw: dict) -> tuple({plt.Figure, plt.Axes}): + """ + This function plots histograms of the grid spacing (diff(ds.DEPTH) and diff(ds.TIME)) where only the inner 99% of values are plotted. + + Parameters + ---------- + ds: xarray in OG1 format with at least TIME and DEPTH. + ax: Optional; axis to plot the data. + kw: Optional; additional keyword arguments for the histograms. + + Returns + ------- + Two histograms showing the distribution of grid spacing for depth and time. + fig: matplotlib.figure.Figure + ax: matplotlib.axes._subplots.AxesSubplot + + Original author + ---------------- + Eleanor Frajka-Williams + """ + utilities._check_necessary_variables(ds, ['TIME', 'DEPTH']) + if ax is None: + fig, ax = plt.subplots(1, 2, figsize=(14, 6)) + else: + fig = plt.gcf() + # Set font sizes for all annotations + def_font_size = 14 + + # Calculate the depth and time differences + depth_diff = np.diff(ds.DEPTH) + orig_time_diff = np.diff(ds.TIME) / np.timedelta64(1, 's') # Convert to seconds + + # Remove NaN values + depth_diff = depth_diff[np.isfinite(depth_diff)] + time_diff = orig_time_diff[np.isfinite(orig_time_diff)] + + # Calculate some statistics (using original data) + median_neg_depth_diff = np.median(depth_diff[depth_diff < 0]) + median_pos_depth_diff = np.median(depth_diff[depth_diff > 0]) + max_depth_diff = np.max(depth_diff) + min_depth_diff = np.min(depth_diff) + + median_time_diff = np.median(orig_time_diff) + mean_time_diff = np.mean(orig_time_diff) + max_time_diff = np.max(orig_time_diff) + min_time_diff = np.min(orig_time_diff) + max_time_diff_hrs = max_time_diff/3600 + + # Remove the top and bottom 0.5% of values to get a better histogram + # This is hiding some data from the user + depth_diff = depth_diff[(depth_diff >= np.nanpercentile(depth_diff, 0.5)) & (depth_diff <= np.nanpercentile(depth_diff, 99.5))] + time_diff = time_diff[(time_diff >= np.nanpercentile(time_diff, 0.5)) & (time_diff <= np.nanpercentile(time_diff, 99.5))] + print('Depth and time differences have been filtered to the middle 99% of values.') + print('Numeric median/mean/max/min values are based on the original data.') + + # Histogram of depth spacing + ax[0].hist(depth_diff, bins=50, **kw) + ax[0].set_xlabel('Depth Spacing (m)') + ax[0].set_ylabel('Frequency') + ax[0].set_title('Histogram of Depth Spacing') + + annotation_text_left = ( + f'Median Negative: {median_neg_depth_diff:.2f} m\n' + f'Median Positive: {median_pos_depth_diff:.2f} m\n' + f'Max: {max_depth_diff:.2f} m\n' + f'Min: {min_depth_diff:.2f} m' + ) + # Determine the best location for the annotation based on the x-axis limits + x_upper_limit = ax[0].get_xlim()[1] + x_lower_limit = ax[0].get_xlim()[0] + if abs(x_lower_limit) > abs(x_upper_limit): + annotation_loc = (0.04, 0.96) # Top left + ha = 'left' + else: + annotation_loc = (0.96, 0.96) # Top right + ha = 'right' + ax[0].annotate(annotation_text_left, xy=annotation_loc, xycoords='axes fraction', + fontsize=def_font_size, ha=ha, va='top', + bbox=dict(boxstyle='round,pad=0.3', edgecolor='black', facecolor='white', alpha=.5)) + + # Histogram of time spacing + ax[1].hist(time_diff, bins=50, **kw) + ax[1].set_xlabel('Time Spacing (s)') + ax[1].set_ylabel('Frequency') + ax[1].set_title('Histogram of Time Spacing') + + annotation_text = ( + f'Median: {median_time_diff:.2f} s\n' + f'Mean: {mean_time_diff:.2f} s\n' + f'Max: {max_time_diff:.2f} s ({max_time_diff_hrs:.2f} hr)\n' + f'Min: {min_time_diff:.2f} s' + ) + ax[1].annotate(annotation_text, xy=(0.96, 0.96), xycoords='axes fraction', + fontsize=def_font_size, ha='right', va='top', + bbox=dict(boxstyle='round,pad=0.3', edgecolor='black', facecolor='white', alpha=.5)) + + # Set font sizes for all annotations + # Font size 14 looks roughly like fontsize 8 when I drop this figure in Word - a bit small + # Font size 14 looks like fontsize 13 when I drop the top *half* of this figure in powerpoint - acceptable + for axes in ax: + axes.xaxis.label.set_size(def_font_size) + axes.yaxis.label.set_size(def_font_size) + axes.tick_params(axis='both', which='major', labelsize=def_font_size) + # More subtle grid lines + axes.grid(True, which='both', linestyle='--', linewidth=0.5, color='grey') + + return fig, ax + +def plot_ts(ds: xr.Dataset, ax: plt.Axes = None, **kw: dict) -> tuple({plt.Figure, plt.Axes}): + """ + This function plots histograms of temperature and salinity values (middle 95%), and a 2D histogram of salinity and temperature with density contours. + + Parameters + ---------- + ds: xarray in OG1 format with at least TEMP and PSAL. + ax: Optional; axis to plot the data. + kw: Optional; additional keyword arguments for the histograms. + + Returns + ------- + Three plots: histogram of temperature, histogram of salinity, and 2D histogram of salinity and temperature with density contours. + fig: matplotlib.figure.Figure + ax: matplotlib.axes._subplots.AxesSubplot + + Original author + ---------------- + Eleanor Frajka-Williams + """ + utilities._check_necessary_variables(ds, ['DEPTH', 'LONGITUDE', 'LATITUDE', 'PSAL', 'TEMP']) + + if ax is None: + fig, ax = plt.subplots(1, 3, figsize=(14, 4.5)) + else: + fig = plt.gcf() + # Set font sizes for all annotations + def_font_size = 14 + num_bins = 30 + + temp_orig = ds.TEMP.values + sal_orig = ds.PSAL.values + + # Reduce both to where both are finite + temp = temp_orig[np.isfinite(temp_orig)&np.isfinite(sal_orig)] + sal = sal_orig[np.isfinite(sal_orig)&np.isfinite(temp_orig)] + depth = ds.DEPTH[np.isfinite(sal_orig)&np.isfinite(temp_orig)] + long = ds.LONGITUDE[np.isfinite(sal_orig)&np.isfinite(temp_orig)] + lat = ds.LATITUDE[np.isfinite(sal_orig)&np.isfinite(temp_orig)] + + SA = gsw.SA_from_SP(sal, depth, long, lat) + CT = gsw.CT_from_t(SA, temp, depth) + + # Reduce to middle 99% of values + # This helps a lot for plotting, but is also hiding some of the data (not great for a test) + CT_filtered = CT[(CT >= np.nanpercentile(CT, .5)) & (CT <= np.nanpercentile(CT, 99.5))] + SA_filtered = SA[(SA >= np.nanpercentile(SA, .5)) & (SA <= np.nanpercentile(SA, 99.5))] + print('Temperature and Salinity values have been filtered to the middle 99% of values.') + + # Calculate density to add contours + xi = np.linspace(SA_filtered.values.min()-.2, SA_filtered.values.max()+.2, 100) + yi = np.linspace(CT_filtered.values.min()-.2, CT_filtered.values.max()+.2, 100) + xi, yi = np.meshgrid(xi, yi) + zi = gsw.sigma0(xi, yi) + + # Temperature histogram + ax[0].hist(CT_filtered, bins=num_bins, **kw) + ax[0].set_xlabel('Conservative Temperature (°C)') + ax[0].set_ylabel('Frequency') + ax[0].set_title('Histogram of Temperature') + ax[0].set_xlim(CT_filtered.min(), CT_filtered.max()) + + # Salinity histogram + ax[1].hist(SA_filtered, bins=num_bins, **kw) + ax[1].set_xlabel('Absolute Salinity ( )') + ax[1].set_ylabel('Frequency') + ax[1].set_title('Histogram of Salinity') + ax[1].set_xlim(SA_filtered.min(), SA_filtered.max()) + + # 2-d T-S histogram + h = ax[2].hist2d(SA_filtered, CT_filtered, bins=num_bins, cmap='viridis', norm=mcolors.LogNorm(), **kw) + ax[2].contour(xi, yi, zi, colors='black', alpha=0.5, linewidths=0.5) + ax[2].clabel(ax[2].contour(xi, yi, zi, colors='black', alpha=0.5, linewidths=0.5), inline=True, fontsize=def_font_size-2) + cbar = plt.colorbar(h[3], ax=ax[2]) + cbar.set_label('Log Counts') + ax[2].set_xlabel('Absolute Salinity ( )') + ax[2].set_ylabel('Conservative Temperature (°C)') + ax[2].set_title('2D Histogram (Log Scale)') + # Set x-limits based on salinity plot and y-limits based on temperature plot + ax[2].set_xlim(ax[1].get_xlim()) + ax[2].set_ylim(ax[0].get_xlim()) + + # Set font sizes for all annotations + for axes in ax: + axes.xaxis.label.set_size(def_font_size) + axes.yaxis.label.set_size(def_font_size) + axes.tick_params(axis='both', which='major', labelsize=def_font_size) + axes.grid(True, which='both', linestyle='--', linewidth=0.5, color='grey') + + + # Adjust the width of ax[1] to match the size of the frame of ax[2] + box1 = ax[1].get_position() + box2 = ax[2].get_position() + dw = box1.width-box2.width + ax[1].set_position([box1.x0+dw, box1.y0, box2.width, box1.height]) + # Adjust the height of ax[2] to match the width of ax[0] + box0 = ax[0].get_position() + dh = box0.height - box2.height + ax[2].set_position([box2.x0, box2.y0 - dh, box2.width, box0.width]) + # Adjust the height of ax[2] to match the width of ax[0] + box0 = ax[0].get_position() + box2 = ax[2].get_position() + fig_width, fig_height = fig.get_size_inches() + new_height = box0.width * fig_width / fig_height + ax[2].set_position([box2.x0, box2.y0, box2.width, new_height]) + + return fig, ax + +def plot_vertical_speeds_with_histograms(ds, start_prof=None, end_prof=None): + """ + Plot vertical speeds with histograms for diagnostic purposes. + This function generates a diagnostic plot for the calculation of vertical seawater velocity. + It plots the modelled and computed (from dz/dt) vertical velocities as line plots, where these + should be similar. The difference between these velocities is the implied seawater velocity, + which should be closer to zero than the vehicle velocities. The histogram provides a visual + representation to identify any biases. The final calculation of the median should be close to + zero if a large enough sample of dives is input and if the glider flight model is well-tuned. + + Parameters + ---------- + ds (xarray.Dataset): The dataset containing the vertical speed data where + - VERT_GLIDER_SPEED is the modelled glider speed + - VERT_SPEED_DZDT is the computed glider speed from the pressure sensor + - VERT_SW_SPEED is the implied seawater velocity. + start_prof (int, optional): The starting profile number for subsetting the dataset. Defaults to first profile number. + end_prof (int, optional): The ending profile number for subsetting the dataset. Defaults to last profile number. + + Returns + ------- + fig, axs (tuple): The figure and axes objects for the plot. + + Original author + ---------------- + Eleanor Frajka-Williams + """ + utilities._check_necessary_variables(ds, ['GLIDER_VERT_VELO_MODEL', 'GLIDER_VERT_VELO_DZDT', 'VERT_CURR_MODEL','PROFILE_NUMBER']) + + if start_prof is None: + start_prof = int(ds['PROFILE_NUMBER'].values.mean())-10 + + if end_prof is None: + end_prof = int(ds['PROFILE_NUMBER'].values.mean())+10 + + ds = ds.where((ds['PROFILE_NUMBER'] >= start_prof) & (ds['PROFILE_NUMBER'] <= end_prof), drop=True) + vert_curr = ds.VERT_CURR_MODEL.values * 100 # Convert to cm/s + vert_dzdt = ds.GLIDER_VERT_VELO_DZDT.values * 100 # Convert to cm/s + vert_model = ds.GLIDER_VERT_VELO_MODEL.values * 100 # Convert to cm/s + + # Calculate the median line for the lower right histogram + median_vert_sw_speed = np.nanmedian(vert_curr) + + # Create a dictionary to map the variable names to their labels for legends + labels_dict = { + 'vert_dzdt': 'w$_{meas}$ (from dz/dt)', + 'vert_model': 'w$_{model}$ (flight model)', + 'vert_curr': 'w$_{sw}$ (calculated)' + } + + fig, axs = plt.subplots(2, 2, figsize=(14, 12), gridspec_kw={'width_ratios': [3, 1]}) + + # Upper left subplot for vertical velocity and glider speed + ax1 = axs[0, 0] + ax1.axhline(0, color='gray', linestyle='-', linewidth=0.5) # Add zero horizontal line + ax1.plot(ds['TIME'], vert_dzdt, label=labels_dict['vert_dzdt']) + ax1.plot(ds['TIME'], vert_model, color='r', label=labels_dict['vert_model']) + ax1.plot(ds['TIME'], vert_curr, color='g', label=labels_dict['vert_curr']) + # Annotations + ax1.set_xlabel('Time') + ax1.set_ylabel('Vertical Velocity (cm/s)') + ax1.legend(loc='lower left') + ax1.xaxis.set_major_formatter(DateFormatter('%d-%b')) + ax1.legend(loc='lower right') + + # Upper right subplot for histogram of vertical velocity + ax1_hist = axs[0, 1] + ax1_hist.hist(vert_dzdt, bins=50, orientation='horizontal', alpha=0.5, color='blue', label=labels_dict['vert_dzdt']) + ax1_hist.hist(vert_model, bins=50, orientation='horizontal', alpha=0.5, color='red', label=labels_dict['vert_model']) + ax1_hist.hist(vert_curr, bins=50, orientation='horizontal', alpha=0.5, color='green', label=labels_dict['vert_curr']) + ax1_hist.set_xlabel('Frequency') + + # Determine the best location for the legend based on the y-axis limits and zero + y_upper_limit = ax1_hist.get_ylim()[1] + y_lower_limit = ax1_hist.get_ylim()[0] + if abs(y_upper_limit) > abs(y_lower_limit): + legend_loc = 'upper right' + else: + legend_loc = 'lower right' + ax1_hist.legend(loc=legend_loc) + + # Lower left subplot for vertical water speed + ax2 = axs[1, 0] + ax2.axhline(0, color='gray', linestyle='-', linewidth=0.5) # Add zero horizontal line + ax2.plot(ds['TIME'], vert_curr, 'g', label=labels_dict['vert_curr']) + # Annotations + ax2.set_xlabel('Time') + ax2.set_ylabel('Vertical Water Speed (cm/s)') + ax2.legend(loc='upper left') + ax2.xaxis.set_major_formatter(DateFormatter('%d-%b')) + + # Lower right subplot for histogram of vertical water speed + ax2_hist = axs[1, 1] + ax2_hist.hist(vert_curr, bins=50, orientation='horizontal', alpha=0.5, color='green', label=labels_dict['vert_curr']) + ax2_hist.axhline(median_vert_sw_speed, color='red', linestyle='dashed', linewidth=1, label=f'Median: {median_vert_sw_speed:.2f} cm/s') + ax2_hist.set_xlabel('Frequency') + + # Determine the best location for the legend based on the y-axis limits and median + y_upper_limit = ax2_hist.get_ylim()[1] + y_lower_limit = ax2_hist.get_ylim()[0] + if abs(y_upper_limit - median_vert_sw_speed) > abs(y_lower_limit - median_vert_sw_speed): + legend_loc = 'upper right' + else: + legend_loc = 'lower right' + ax2_hist.legend(loc=legend_loc) + + # Set font sizes for all annotations + # Font size 14 looks roughly like fontsize 8 when I drop this figure in Word - a bit small + # Font size 14 looks like fontsize 13 when I drop the top *half* of this figure in powerpoint - acceptable + def_font_size = 14 + for ax in [ax1, ax2, ax1_hist, ax2_hist]: + ax.yaxis.label.set_size(def_font_size) + ax.legend(fontsize=def_font_size) + ax.tick_params(axis='both', which='major', labelsize=def_font_size) + ax.xaxis.label.set_size(def_font_size) + + # Adjust the axes so that the distance between y-ticks on the top and lower panel is the same + # Get the y-axis range of the top left plot + y1_range = ax1.get_ylim()[1] - ax1.get_ylim()[0] + # Get the y-axis limits of the lower left plot + y2_range = ax2.get_ylim()[1] - ax2.get_ylim()[0] + # Get the height in inches of the top left plot + box1 = ax1.get_position() + height1 = box1.height + # Get the height in inches of the lower left plot + box2 = ax2.get_position() + height2 = box2.height + # Set a scaled height for the lower left plot + new_height = height1 * y2_range / y1_range + # Determine the change in height + height_change = height2 - new_height + # Shift the y-position of the lower left plot by the change in height + ax2.set_position([box2.x0, box2.y0 + height_change, box2.width, new_height]) + + # Get the position of the lower right plot + box2_hist = ax2_hist.get_position() + # Adjust the position of the lower right plot to match the height of the lower left plot + ax2_hist.set_position([box2_hist.x0, box2_hist.y0 + height_change, box2_hist.width, new_height]) + + # Find the distance between the right edge of the top left plot and the left edge of the top right plot + box1_hist = ax1_hist.get_position() + distance = box1_hist.x0 - (box1.x0 + box1.width) + shift_dist = distance/3 # Not sure this will always work; it may depend on the def_fault_size + # Adjust the width of the top right plot to extend left by half the distance + ax1_hist.set_position([box1_hist.x0 - shift_dist, box1_hist.y0, box1_hist.width + shift_dist, box1_hist.height]) + # Adjust the width of the bottom right plot to extend left by half the distance + box2_hist = ax2_hist.get_position() + ax2_hist.set_position([box2_hist.x0 - shift_dist, box2_hist.y0, box2_hist.width + shift_dist, box2_hist.height]) + + plt.show() + + return fig, axs + +def plot_combined_velocity_profiles(ds_out_dives: xr.Dataset, ds_out_climbs: xr.Dataset): + """ + Plots combined vertical velocity profiles for dives and climbs. + + Replicates Fig 3 in Frajka-Williams et al. 2011, but using an updated dataset from Jim Bennett (2013), + now in OG1 format as sg014_20040924T182454_delayed.nc. Note that flight model parameters may differ from those in the paper. + + Parameters + ---------- + ds_out_dives (xarray.Dataset): Dataset containing dive profiles with variables 'zgrid', 'meanw', 'w_lower', and 'w_upper'. + ds_out_climbs (xarray.Dataset): Dataset containing climb profiles with variables 'zgrid', 'meanw', 'w_lower', and 'w_upper'. + + The function converts vertical velocities from m/s to cm/s, plots the mean vertical velocities and their ranges for both dives and climbs, and customizes the plot with labels, legends, and axis settings. + + Note + ---- + Assumes that the vertical velocities are in m/s and the depth grid is in meters. + + Original author + ---------------- + Eleanor Frajka-Williams + """ + conv_factor = 100 # Convert m/s to cm/s + depth_negative = ds_out_dives.zgrid.values * -1 + meanw_dives = ds_out_dives.meanw.values * conv_factor + zgrid_dives = depth_negative + w_lower_dives = ds_out_dives.w_lower.values * conv_factor + w_upper_dives = ds_out_dives.w_upper.values * conv_factor + + meanw_climbs = ds_out_climbs.meanw.values * conv_factor + zgrid_climbs = ds_out_climbs.zgrid.values * -1 + w_lower_climbs = ds_out_climbs.w_lower.values * conv_factor + w_upper_climbs = ds_out_climbs.w_upper.values * conv_factor + + fig, ax = plt.subplots(1, 1, figsize=(4.8, 4.8)) + + ax.xaxis.label.set_size(14) + ax.yaxis.label.set_size(14) + ax.tick_params(axis='both', which='major', labelsize=14) + + # Plot dives + ax.fill_betweenx(zgrid_dives, w_lower_dives, w_upper_dives, color='black', alpha=0.3) + ax.plot(meanw_dives, zgrid_dives, color='black', label='w$_{dive}$') + + # Plot climbs + ax.fill_betweenx(zgrid_climbs, w_lower_climbs, w_upper_climbs, color='red', alpha=0.3) + ax.plot(meanw_climbs, zgrid_climbs, color='red', label='w$_{climb}$') + + ax.invert_yaxis() # Invert y-axis to show depth increasing downwards + ax.axvline(x=0, color='darkgray', linestyle='-', linewidth=0.5) # Add vertical line through 0 + ax.set_xlabel('Vertical Velocity w$_{sw}$ (cm s$^{-1}$)') + ax.set_ylabel('Depth (m)') + ax.set_ylim(top=0, bottom=1000) # Set y-limit maximum to zero + ax.legend(fontsize=14) + #ax.set_title('Combined Vertical Velocity Profiles') + + ax.set_xlim(-1, 1.5) + ax.set_xticks([-1, -0.5, 0, 0.5, 1.0, 1.5]) + plt.tight_layout() + ax.spines['right'].set_visible(False) + ax.spines['top'].set_visible(False) + plt.show() + ax.tick_params(axis='both', which='major', labelsize=12) diff --git a/glidertest/tools.py b/glidertest/tools.py index 83539d4..6792585 100644 --- a/glidertest/tools.py +++ b/glidertest/tools.py @@ -14,80 +14,7 @@ import cartopy.crs as ccrs import cartopy.feature as cfeature import warnings - - -def _check_necessary_variables(ds: xr.Dataset, vars: list): - """ - Checks that all of a list of variables are present in a dataset - - Parameters - ---------- - ds (xarray.Dataset): _description_ - vars (list): _description_ - - Raises - ---------- - KeyError: Raises an error if all vars not present in ds - - Original author - ---------------- - Callum Rollo - """ - missing_vars = set(vars).difference(set(ds.variables)) - if missing_vars: - msg = f"Required variables {list(missing_vars)} do not exist in the supplied dataset." - raise KeyError(msg) - - -def _calc_teos10_variables(ds): - """ - Calculates TEOS 10 variables not present in the dataset - :param ds: - :return: - """ - _check_necessary_variables(ds, ['DEPTH', 'LONGITUDE', 'LATITUDE', 'TEMP', 'PSAL']) - if 'DENSITY' not in ds.variables: - SA = gsw.SA_from_SP(ds.PSAL, ds.DEPTH, ds.LONGITUDE, ds.LATITUDE) - CT = gsw.CT_from_t(SA, ds.TEMP, ds.DEPTH) - ds['DENSITY'] = ('N_MEASUREMENTS', gsw.rho(SA, CT, ds.DEPTH).values) - return ds - - -def construct_2dgrid(x, y, v, xi=1, yi=1): - """ - Function to grid data - - Parameters - ---------- - x: data with data for the x dimension - y: data with data for the y dimension - v: data with data for the z dimension - xi: Horizontal resolution for the gridding - yi: Vertical resolution for the gridding - - Returns - ------- - grid: z data gridded in x and y space with xi and yi resolution - XI: x data gridded in x and y space with xi and yi resolution - YI: y data gridded in x and y space with xi and yi resolution - - Original author - ---------------- - Bastien Queste (https://github.com/bastienqueste/gliderad2cp/blob/de0652f70f4768c228f83480fa7d1d71c00f9449/gliderad2cp/process_adcp.py#L140) - - """ - if np.size(xi) == 1: - xi = np.arange(np.nanmin(x), np.nanmax(x) + xi, xi) - if np.size(yi) == 1: - yi = np.arange(np.nanmin(y), np.nanmax(y) + yi, yi) - raw = pd.DataFrame({'x': x, 'y': y, 'v': v}).dropna() - grid = np.full([np.size(xi), np.size(yi)], np.nan) - raw['xbins'], xbin_iter = pd.cut(raw.x, xi, retbins=True, labels=False) - raw['ybins'], ybin_iter = pd.cut(raw.y, yi, retbins=True, labels=False) - _tmp = raw.groupby(['xbins', 'ybins'])['v'].agg('median') - grid[_tmp.index.get_level_values(0).astype(int), _tmp.index.get_level_values(1).astype(int)] = _tmp.values - YI, XI = np.meshgrid(yi, xi) - return grid, XI, YI +from glidertest import utilities def quant_updown_bias(ds, var='PSAL', v_res=1): @@ -109,12 +36,12 @@ def quant_updown_bias(ds, var='PSAL', v_res=1): ---------------- Chiara Monforte """ - _check_necessary_variables(ds, ['PROFILE_NUMBER', 'DEPTH', var]) + utilities._check_necessary_variables(ds, ['PROFILE_NUMBER', 'DEPTH', var]) p = 1 # Horizontal resolution z = v_res # Vertical resolution if var in ds.variables: - varG, profG, depthG = construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds[var], p, z) + varG, profG, depthG = utilities.construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds[var], p, z) grad = np.diff(varG, axis=0) # Horizontal gradients with warnings.catch_warnings(): @@ -129,354 +56,6 @@ def quant_updown_bias(ds, var='PSAL', v_res=1): return df -def plot_updown_bias(df: pd.DataFrame, ax: plt.Axes = None, xlabel='Temperature [C]', **kw: dict, ) -> tuple({plt.Figure, plt.Axes}): - """ - This function can be used to plot the up and downcast differences computed with the updown_bias function - - Parameters - ---------- - df: pandas dataframe containing dc (Dive - Climb average), cd (Climb - Dive average) and depth - ax: axis to plot the data - xlabel: label for the x-axis - - Returns - ------- - A line plot comparing the day and night average over depth for the selected day - - Original author - ---------------- - Chiara Monforte - """ - if ax is None: - fig, ax = plt.subplots(figsize=(5, 5)) - else: - fig = plt.gcf() - - if not all(hasattr(df, attr) for attr in ['dc', 'depth']): - ax.text(0.5, 0.55, xlabel, va='center', ha='center', transform=ax.transAxes, bbox=dict(facecolor='white', alpha=0.5, edgecolor='none')) - ax.text(0.5, 0.45, 'data unavailable', va='center', ha='center', transform=ax.transAxes, bbox=dict(facecolor='white', alpha=0.5, edgecolor='none')) - else: - ax.plot(df.dc, df.depth, label='Dive-Climb') - ax.plot(df.cd, df.depth, label='Climb-Dive') - ax.legend(loc=3) - lims = np.abs(df.dc) - ax.set_xlim(-np.nanpercentile(lims, 99.5), np.nanpercentile(lims, 99.5)) - ax.set_ylim(df.depth.max() + 1, -df.depth.max() / 30) - ax.set_xlabel(xlabel) - ax.grid() - return fig, ax - - -def compute_cline(var, depth_array): - """ - Find the depth of the maximum vertical difference for a specified variables - - Parameters - ---------- - var: 2D array containing data from a selected variable gridded over time/profile/distance etc. and depth (y-axis)) - depth_array: 2D array containing pressure/depth data gridded over time/profile/distance etc. and depth (y-axis)) - - Returns - ------- - 1D array containing the depth of the cline at each timestamp/profile/distance etc. - Original author - ---------------- - Chiara Monforte - """ - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=RuntimeWarning) - clin = np.where(np.abs(np.diff(np.nanmean(var, axis=0))) == np.nanmax(np.abs(np.diff(np.nanmean(var, axis=0))))) - return np.round(depth_array[0, clin[0]], 1) - - -def plot_basic_vars(ds: xr.Dataset, v_res=1, start_prof=0, end_prof=-1): - """ - This function plots the basic oceanographic variables temperature, salinity and density. A second plot is created and filled with oxygen and - chlorophyll data if available. - - Parameters - ---------- - ds: xarray in OG1 format containing at least temperature, salinity and density and depth - v_res: vertical resolution for the gridding. Horizontal resolution (by profile) is assumed to be 1 - start_prof: start profile used to compute the means that will be plotted. This can vary in case the dataset spread over a large timescale - or region and subsections want to be plotted-1 - end_prof: end profile used to compute the means that will be plotted. This can vary in case the dataset spread over a large timescale - or region and subsections want to be plotted-1 - - Returns - ------- - Line plots for the averages of the different variables. - Thermo, halo and pycnocline are computed and plotted. A sentence stating the depth of the clines is printed too - - Original author - ---------------- - Chiara Monforte - """ - _check_necessary_variables(ds, ['PROFILE_NUMBER', 'DEPTH', 'TEMP', 'PSAL', 'LATITUDE', 'LONGITUDE']) - ds = _calc_teos10_variables(ds) - p = 1 - z = v_res - tempG, profG, depthG = construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.TEMP, p, z) - salG, profG, depthG = construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.PSAL, p, z) - denG, profG, depthG = construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.DENSITY, p, z) - - - tempG = tempG[start_prof:end_prof, :] - salG = salG[start_prof:end_prof, :] - denG = denG[start_prof:end_prof, :] - depthG = depthG[start_prof:end_prof, :] - - halo = compute_cline(salG, depthG) - thermo = compute_cline(tempG, depthG) - pycno = compute_cline(denG, depthG) - print( - f'The thermocline, halocline and pycnocline are located at respectively {thermo}, {halo} and {pycno}m as shown in the plots as well') - with warnings.catch_warnings(): - warnings.simplefilter("ignore", category=RuntimeWarning) - fig, ax = plt.subplots(1, 2, figsize=(15, 5)) - ax1 = ax[0].twiny() - ax2 = ax[0].twiny() - ax2.spines["top"].set_position(("axes", 1.2)) - ax[0].plot(np.nanmean(tempG, axis=0), depthG[0, :], c='blue') - ax1.plot(np.nanmean(salG, axis=0), depthG[0, :], c='red') - ax2.plot(np.nanmean(denG, axis=0), depthG[0, :], c='black') - ax[0].axhline(thermo, linestyle='dashed', c='blue') - ax1.axhline(halo, linestyle='dashed', c='red') - ax2.axhline(pycno, linestyle='dashed', c='black') - - ax[0].set(xlabel=f'Average Temperature [C] \nbetween profile {start_prof} and {end_prof}', ylabel='Depth (m)') - ax[0].tick_params(axis='x', colors='blue') - ax[0].xaxis.label.set_color('blue') - ax1.spines['bottom'].set_color('blue') - ax1.set(xlabel=f'Average Salinity [PSU] \nbetween profile {start_prof} and {end_prof}') - ax1.xaxis.label.set_color('red') - ax1.spines['top'].set_color('red') - ax1.tick_params(axis='x', colors='red') - ax2.spines['bottom'].set_color('black') - ax2.set(xlabel=f'Average Density [kg m-3] \nbetween profile {start_prof} and {end_prof}') - ax2.xaxis.label.set_color('black') - ax2.spines['top'].set_color('black') - ax2.tick_params(axis='x', colors='black') - - if 'CHLA' in ds.variables: - chlaG, profG, depthG = construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.CHLA, p, z) - chlaG = chlaG[start_prof:end_prof, :] - ax2_1 = ax[1].twiny() - ax2_1.plot(np.nanmean(chlaG, axis=0), depthG[0, :], c='green') - ax2_1.set(xlabel=f'Average Chlorophyll-a [mg m-3] \nbetween profile {start_prof} and {end_prof}') - ax2_1.xaxis.label.set_color('green') - ax2_1.spines['top'].set_color('green') - ax2_1.tick_params(axis='x', colors='green') - else: - ax[1].text(0.3, 0.7, 'Chlorophyll data unavailable', va='top', transform=ax[1].transAxes) - - if 'DOXY' in ds.variables: - oxyG, profG, depthG = construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.DOXY, p, z) - oxyG = oxyG[start_prof:end_prof, :] - ax[1].plot(np.nanmean(oxyG, axis=0), depthG[0, :], c='orange') - ax[1].set(xlabel=f'Average Oxygen [mmol m-3] \nbetween profile {start_prof} and {end_prof}') - ax[1].xaxis.label.set_color('orange') - ax[1].spines['top'].set_color('orange') - ax[1].tick_params(axis='x', colors='orange') - ax[1].spines['bottom'].set_color('orange') - else: - ax[1].text(0.3, 0.5, 'Oxygen data unavailable', va='top', transform=ax[1].transAxes) - - [a.set_ylim(depthG.max() + 10, -5) for a in ax] - [a.grid() for a in ax] - return fig, ax - - -def process_optics_assess(ds, var='CHLA'): - """ - Function to assess visually any drift in deep optics data and the presence of any possible negative data. This function returns both plots and text - - Parameters - ---------- - ds: xarray dataset in OG1 format containing at least time, depth and the selected optical variable - var: name of the selected variable - - Returns - ------- - Text giving info on where and when negative data was observed - Plot showing bottom data with a linear regression line to highlight any drift - - Original author - ---------------- - Chiara Monforte - """ - _check_necessary_variables(ds, [var, 'TIME', 'DEPTH']) - # Check how much negative data there is - neg_chl = np.round((len(np.where(ds[var] < 0)[0]) * 100) / len(ds[var]), 1) - if neg_chl > 0: - print(f'{neg_chl}% of scaled {var} data is negative, consider recalibrating data') - # Check where the negative values occur and if we just see them at specific time of the mission or not - start = ds.TIME[np.where(ds[var] < 0)][0] - end = ds.TIME[np.where(ds[var] < 0)][-1] - min_z = ds.DEPTH[np.where(ds[var] < 0)].min().values - max_z = ds.DEPTH[np.where(ds[var] < 0)].max().values - print(f'Negative data in present from {str(start.values)[:16]} to {str(end.values)[:16]}') - print(f'Negative data is present between {"%.1f" % np.round(min_z, 1)} and {"%.1f" % np.round(max_z, 1)} ') - else: - print(f'There is no negative scaled {var} data, recalibration and further checks are still recommended ') - # Check if there is any missing data throughout the mission - if len(ds.TIME) != len(ds[var].dropna(dim='N_MEASUREMENTS').TIME): - print(f'{var} data is missing for part of the mission') # Add to specify where the gaps are - else: - print(f'{var} data is present for the entire mission duration') - # Check bottom dark count and any drift there - bottom_opt_data = ds[var].where(ds[var].DEPTH > ds.DEPTH.max() - (ds.DEPTH.max() * 0.1)).dropna( - dim='N_MEASUREMENTS') - slope, intercept, r_value, p_value, std_err = stats.linregress(np.arange(0, len(bottom_opt_data)), bottom_opt_data) - ax = sns.regplot(data=ds, x=np.arange(0, len(bottom_opt_data)), y=bottom_opt_data, - scatter_kws={"color": "grey"}, - line_kws={"color": "red", "label": "y={0:.8f}x+{1:.5f}".format(slope, intercept)}, - ) - ax.legend(loc=2) - ax.grid() - ax.set(ylim=(np.nanpercentile(bottom_opt_data, 0.5), np.nanpercentile(bottom_opt_data, 99.5)), - xlabel='Measurements', - ylabel=var) - percentage_change = (((slope * len(bottom_opt_data) + intercept) - intercept) / abs(intercept)) * 100 - - if abs(percentage_change) >= 1: - print( - 'Data from the deepest 10% of data has been analysed and data does not seem perfectly stable. An alternative solution for dark counts has to be considered. \nMoreover, it is recommended to check the sensor has this may suggest issues with the sensor (i.e water inside the sensor, temporal drift etc)') - print( - f'Data changed (increased or decreased) by {"%.1f" % np.round(percentage_change, 1)}% from the beginning to the end of the mission') - else: - print( - f'Data from the deepest 10% of data has been analysed and data seems stable. These deep values can be used to re-assess the dark count if the no {var} at depth assumption is valid in this site and this depth') - return ax - - -def compute_sunset_sunrise(time, lat, lon): - """ - Calculates the local sunrise/sunset of the glider location from GliderTools. - - The function uses the Skyfield package to calculate the sunrise and sunset - times using the date, latitude and longitude. The times are returned - rather than day or night indices, as it is more flexible for the quenching - correction. - - - Parameters - ---------- - time: numpy.ndarray or pandas.Series - The date & time array in a numpy.datetime64 format. - lat: numpy.ndarray or pandas.Series - The latitude of the glider position. - lon: numpy.ndarray or pandas.Series - The longitude of the glider position. - - Returns - ------- - sunrise: numpy.ndarray - An array of the sunrise times. - sunset: numpy.ndarray - An array of the sunset times. - - Original author - ---------------- - Function from GliderTools (https://github.com/GliderToolsCommunity/GliderTools/blob/master/glidertools/optics.py) - - """ - - ts = api.load.timescale() - eph = api.load("de421.bsp") - - df = DataFrame.from_dict(dict([("time", time), ("lat", lat), ("lon", lon)])) - - # set days as index - df = df.set_index(df.time.values.astype("datetime64[D]")) - - # groupby days and find sunrise/sunset for unique days - grp_avg = df.groupby(df.index).mean(numeric_only=False) - date = grp_avg.index - - time_utc = ts.utc(date.year, date.month, date.day, date.hour) - time_utc_offset = ts.utc( - date.year, date.month, date.day + 1, date.hour - ) # add one day for each unique day to compute sunrise and sunset pairs - - bluffton = [] - for i in range(len(grp_avg.lat)): - bluffton.append(api.wgs84.latlon(grp_avg.lat.iloc[i], grp_avg.lon.iloc[i])) - bluffton = np.array(bluffton) - - sunrise = [] - sunset = [] - for n in range(len(bluffton)): - - f = almanac.sunrise_sunset(eph, bluffton[n]) - t, y = almanac.find_discrete(time_utc[n], time_utc_offset[n], f) - - if not t: - if f(time_utc[n]): # polar day - sunrise.append( - pd.Timestamp( - date[n].year, date[n].month, date[n].day, 0, 1 - ).to_datetime64() - ) - sunset.append( - pd.Timestamp( - date[n].year, date[n].month, date[n].day, 23, 59 - ).to_datetime64() - ) - else: # polar night - sunrise.append( - pd.Timestamp( - date[n].year, date[n].month, date[n].day, 11, 59 - ).to_datetime64() - ) - sunset.append( - pd.Timestamp( - date[n].year, date[n].month, date[n].day, 12, 1 - ).to_datetime64() - ) - - else: - sr = t[y == 1] # y=1 sunrise - sn = t[y == 0] # y=0 sunset - - sunup = pd.to_datetime(sr.utc_iso()).tz_localize(None) - sundown = pd.to_datetime(sn.utc_iso()).tz_localize(None) - - # this doesn't look very efficient at the moment, but I was having issues with getting the datetime64 - # to be compatible with the above code to handle polar day and polar night - - su = pd.Timestamp( - sunup.year[0], - sunup.month[0], - sunup.day[0], - sunup.hour[0], - sunup.minute[0], - ).to_datetime64() - - sd = pd.Timestamp( - sundown.year[0], - sundown.month[0], - sundown.day[0], - sundown.hour[0], - sundown.minute[0], - ).to_datetime64() - - sunrise.append(su) - sunset.append(sd) - - sunrise = np.array(sunrise).squeeze() - sunset = np.array(sunset).squeeze() - - grp_avg["sunrise"] = sunrise - grp_avg["sunset"] = sunset - - # reindex days to original dataframe as night - df_reidx = grp_avg.reindex(df.index) - sunrise, sunset = df_reidx[["sunrise", "sunset"]].values.T - - return sunrise, sunset - - def compute_daynight_avg(ds, sel_var='CHLA', start_time=None, end_time=None, start_prof=None, end_prof=None): """ This function computes night and day averages for a selected variable over a specific period of time or a specific series of dives @@ -517,7 +96,7 @@ def compute_daynight_avg(ds, sel_var='CHLA', start_time=None, end_time=None, sta Chiara Monforte """ - _check_necessary_variables(ds, ['TIME', sel_var, 'DEPTH']) + utilities._check_necessary_variables(ds, ['TIME', sel_var, 'DEPTH']) if "TIME" not in ds.indexes.keys(): ds = ds.set_xindex('TIME') @@ -532,7 +111,7 @@ def compute_daynight_avg(ds, sel_var='CHLA', start_time=None, end_time=None, sta ds_sel = ds.sel(TIME=slice(t1,t2)) else: ds_sel = ds.sel(TIME=slice(start_time, end_time)) - sunrise, sunset = compute_sunset_sunrise(ds_sel.TIME, ds_sel.LATITUDE, ds_sel.LONGITUDE) + sunrise, sunset = utilities.compute_sunset_sunrise(ds_sel.TIME, ds_sel.LATITUDE, ds_sel.LONGITUDE) # creating batches where one batch is a night and the following day day = (ds_sel.TIME > sunrise) & (ds_sel.TIME < sunset) @@ -556,153 +135,6 @@ def compute_daynight_avg(ds, sel_var='CHLA', start_time=None, end_time=None, sta return day_av, night_av -def plot_daynight_avg(day: pd.DataFrame, night: pd.DataFrame, ax: plt.Axes = None, sel_day=None, - xlabel='Chlorophyll [mg m-3]', **kw: dict, ) -> tuple({plt.Figure, plt.Axes}): - """ - This function can be used to plot the day and night averages computed with the day_night_avg function - - Parameters - ---------- - day: pandas dataframe containing the day averages - night: pandas dataframe containing the night averages - ax: axis to plot the data - sel_day: selected day to plot. Defaults to the median day - xlabel: label for the x-axis - - Returns - ------- - A line plot comparing the day and night average over depth for the selected day - - Original author - ---------------- - Chiara Monforte - - """ - if not sel_day: - dates = list(day.date.dropna().values) + list(night.date.dropna().values) - dates.sort() - sel_day = dates[int(len(dates)/2)] - if ax is None: - fig, ax = plt.subplots(figsize=(5, 5)) - else: - fig = plt.gcf() - ax.plot(night.where(night.date == sel_day).dropna().dat, night.where(night.date == sel_day).dropna().depth, - label='Night time average') - ax.plot(day.where(day.date == sel_day).dropna().dat, day.where(day.date == sel_day).dropna().depth, - label='Daytime average') - ax.legend() - ax.invert_yaxis() - ax.grid() - ax.set(xlabel=xlabel, ylabel='Depth [m]') - ax.set_title(sel_day) - return fig, ax - - -def plot_quench_assess(ds: xr.Dataset, sel_var: str, ax: plt.Axes = None, start_time=None, - end_time=None,start_prof=None, end_prof=None, ylim=45, **kw: dict, ) -> tuple({plt.Figure, plt.Axes}): - """ - This function can be used to plot sections for any variable with the sunrise and sunset plotted over - - Parameters - ---------- - ds: xarray on OG1 format containing at least time, depth, latitude, longitude and the selected variable. - Data should not be gridded. - sel_var: selected variable to plot - ax: axis to plot the data - start_time: Start date of the data selection format 'YYYY-MM-DD'. As missions can be long and came make it hard to visualise NPQ effect. - Defaults to mid 4 days - end_time: End date of the data selection format 'YYYY-MM-DD'. As missions can be long and came make it hard to visualise NPQ effect. - Defaults to mid 4 days - start_prof: Start profile of the data selection. If no profile is specified, the specified time selection will be used or the mid 4 days of the deployment - end_prof: End profile of the data selection. If no profile is specified, the specified time selection will be used or the mid 4 days of the deployment - ylim: specified limit for the maximum y-axis value. The minimum is computed as ylim/30 - - Returns - ------- - A section showing the variability of the selected data over time and depth - - Original author - ---------------- - Chiara Monforte - """ - _check_necessary_variables(ds, ['TIME', sel_var, 'DEPTH']) - if ax is None: - fig, ax = plt.subplots(figsize=(5, 5)) - else: - fig = plt.gcf() - - if "TIME" not in ds.indexes.keys(): - ds = ds.set_xindex('TIME') - - if not start_time: - start_time = ds.TIME.mean() - np.timedelta64(2, 'D') - if not end_time: - end_time = ds.TIME.mean() + np.timedelta64(2, 'D') - - if start_prof and end_prof: - t1 = ds.TIME.where(ds.PROFILE_NUMBER==start_prof).dropna(dim='N_MEASUREMENTS')[0] - t2 = ds.TIME.where(ds.PROFILE_NUMBER==end_prof).dropna(dim='N_MEASUREMENTS')[-1] - ds_sel = ds.sel(TIME=slice(t1,t2)) - else: - ds_sel = ds.sel(TIME=slice(start_time, end_time)) - - if len(ds_sel.TIME) == 0: - msg = f"supplied limits start_time: {start_time} end_time: {end_time} do not overlap with dataset TIME range {str(ds.TIME.values.min())[:10]} - {str(ds.TIME.values.max())[:10]}" - raise ValueError(msg) - - sunrise, sunset = compute_sunset_sunrise(ds_sel.TIME, ds_sel.LATITUDE, ds_sel.LONGITUDE) - - c = ax.scatter(ds_sel.TIME, ds_sel.DEPTH, c=ds_sel[sel_var], s=10, vmin=np.nanpercentile(ds_sel[sel_var], 0.5), - vmax=np.nanpercentile(ds_sel[sel_var], 99.5)) - ax.set_ylim(ylim, -ylim / 30) - for n in np.unique(sunset): - ax.axvline(np.unique(n), c='blue') - for m in np.unique(sunrise): - ax.axvline(np.unique(m), c='orange') - ax.set_ylabel('Depth [m]') - plt.colorbar(c, label=f'{sel_var} [{ds[sel_var].units}]') - return fig, ax - - -def check_temporal_drift(ds: xr.Dataset, var: str, ax: plt.Axes = None, **kw: dict, ) -> tuple({plt.Figure, plt.Axes}): - """ - This function can be used to plot sections for any variable with the sunrise and sunset plotted over - - Parameters - ---------- - ds: xarray on OG1 format containing at least time, depth, latitude, longitude and the selected variable. - Data should not be gridded. - var: selected variable to plot - ax: axis to plot the data - - Returns - ------- - A figure with two subplots. One is a section containing the data over time and depth. The other one is a scatter of data from the variable - over depth and colored by date - - Original author - ---------------- - Chiara Monforte - """ - _check_necessary_variables(ds, ['TIME', var, 'DEPTH']) - if ax is None: - fig, ax = plt.subplots(1, 2, figsize=(14, 6)) - else: - fig = plt.gcf() - - ax[0].scatter(mdates.date2num(ds.TIME), ds[var], s=10) - ax[0].xaxis.set_major_formatter(DateFormatter('%Y-%m-%d')) - ax[0].set(ylim=(np.nanpercentile(ds[var], 0.01), np.nanpercentile(ds[var], 99.99)), ylabel=var) - - c = ax[1].scatter(ds[var], ds.DEPTH, c=mdates.date2num(ds.TIME), s=10) - ax[1].set(xlim=(np.nanpercentile(ds[var], 0.01), np.nanpercentile(ds[var], 99.99)), ylabel='Depth (m)', xlabel=var) - ax[1].invert_yaxis() - - [a.grid() for a in ax] - plt.colorbar(c, format=DateFormatter('%b %d')) - return fig, ax - - def check_monotony(da): """ This function check weather the selected variable over the mission is monotonically increasing or not. This is developed in particular for profile number. @@ -728,332 +160,6 @@ def check_monotony(da): print(f'{da.name} is always monotonically increasing') return True - -def plot_prof_monotony(ds: xr.Dataset, ax: plt.Axes = None, **kw: dict, ) -> tuple({plt.Figure, plt.Axes}): - - """ - This function can be used to plot the profile number and check for any possible issues with the profile index assigned. - - Parameters - ---------- - ds: xarray dataset in OG1 format with at least PROFILE_NUMBER, TIME, DEPTH. Data should not be gridded - ax: axis to plot the data - - Returns - ------- - Two plots, one line plot with the profile number over time (expected to be always increasing). A - second plot which is a scatter plot showing at which depth over time there was a profile index where the - difference was neither 0 nor 1 (meaning there are possibly issues with how the profile index was assigned). - - Original author - ---------------- - Chiara Monforte - - """ - _check_necessary_variables(ds, ['TIME', 'PROFILE_NUMBER', 'DEPTH']) - if ax is None: - fig, ax = plt.subplots(2, 1, figsize=(10, 5), sharex=True) - else: - fig = plt.gcf() - - ax[0].plot(ds.TIME, ds.PROFILE_NUMBER) - ax[0].set(ylabel='Profile_Number') - if len(np.where((np.diff(ds.PROFILE_NUMBER) != 0) & (np.diff(ds.PROFILE_NUMBER) != 1))[0]) == 0: - ax[1].text(0.2, 0.5, 'Data in monotonically increasing and no issues can be observed', - transform=ax[1].transAxes) - else: - ax[1].scatter(ds.TIME[np.where((np.diff(ds.PROFILE_NUMBER) != 0) & (np.diff(ds.PROFILE_NUMBER) != 1))], - ds.DEPTH[np.where((np.diff(ds.PROFILE_NUMBER) != 0) & (np.diff(ds.PROFILE_NUMBER) != 1))], - s=10, c='red', label='Depth at which we have issues \n with the profile number assigned') - ax[1].legend() - ax[1].set(ylabel='Depth') - ax[1].invert_yaxis() - ax[1].xaxis.set_major_locator(plt.MaxNLocator(8)) - [a.grid() for a in ax] - return fig, ax - - -def plot_glider_track(ds: xr.Dataset, ax: plt.Axes = None, **kw: dict) -> tuple({plt.Figure, plt.Axes}): - """ - This function plots the glider track on a map, with latitude and longitude colored by time. - - Parameters - ---------- - ds: xarray in OG1 format with at least TIME, LATITUDE, and LONGITUDE. - ax: Optional; axis to plot the data. - kw: Optional; additional keyword arguments for the scatter plot. - - Returns - ------- - One plot with the map of the glider track. - fig: matplotlib.figure.Figure - ax: matplotlib.axes._subplots.AxesSubplot - - Original author - ---------------- - Eleanor Frajka-Williams - """ - _check_necessary_variables(ds, ['TIME', 'LONGITUDE', 'LATITUDE']) - if ax is None: - fig, ax = plt.subplots(figsize=(10, 6), subplot_kw={'projection': ccrs.PlateCarree()}) - else: - fig = plt.gcf() - - latitudes = ds.LATITUDE.values - longitudes = ds.LONGITUDE.values - times = ds.TIME.values - - # Reduce the number of latitudes, longitudes, and times to no more than 2000 - if len(latitudes) > 2000: - indices = np.linspace(0, len(latitudes) - 1, 2000).astype(int) - latitudes = latitudes[indices] - longitudes = longitudes[indices] - times = times[indices] - - # Convert time to the desired format - time_labels = [pd.to_datetime(t).strftime('%Y-%b-%d') for t in times] - - # Plot latitude and longitude colored by time - sc = ax.scatter(longitudes, latitudes, c=times, cmap='viridis', **kw) - - # Add colorbar with formatted time labels - cbar = plt.colorbar(sc, ax=ax) #, label='Time') - cbar.ax.set_yticklabels([pd.to_datetime(t).strftime('%Y-%b-%d') for t in cbar.get_ticks()]) - - ax.set_extent([np.min(longitudes)-1, np.max(longitudes)+1, np.min(latitudes)-1, np.max(latitudes)+1], crs=ccrs.PlateCarree()) - - # Add features to the map - ax.add_feature(cfeature.LAND) - ax.add_feature(cfeature.OCEAN) - ax.add_feature(cfeature.COASTLINE) - - ax.set_xlabel('Longitude') - ax.set_ylabel('Latitude') - ax.set_title('Glider Track') - gl = ax.gridlines(draw_labels=True, color='black', alpha=0.5, linestyle='--') - gl.top_labels = False - gl.right_labels = False - plt.show() - - return fig, ax - -def plot_grid_spacing(ds: xr.Dataset, ax: plt.Axes = None, **kw: dict) -> tuple({plt.Figure, plt.Axes}): - """ - This function plots histograms of the grid spacing (diff(ds.DEPTH) and diff(ds.TIME)) where only the inner 99% of values are plotted. - - Parameters - ---------- - ds: xarray in OG1 format with at least TIME and DEPTH. - ax: Optional; axis to plot the data. - kw: Optional; additional keyword arguments for the histograms. - - Returns - ------- - Two histograms showing the distribution of grid spacing for depth and time. - fig: matplotlib.figure.Figure - ax: matplotlib.axes._subplots.AxesSubplot - - Original author - ---------------- - Eleanor Frajka-Williams - """ - _check_necessary_variables(ds, ['TIME', 'DEPTH']) - if ax is None: - fig, ax = plt.subplots(1, 2, figsize=(14, 6)) - else: - fig = plt.gcf() - # Set font sizes for all annotations - def_font_size = 14 - - # Calculate the depth and time differences - depth_diff = np.diff(ds.DEPTH) - orig_time_diff = np.diff(ds.TIME) / np.timedelta64(1, 's') # Convert to seconds - - # Remove NaN values - depth_diff = depth_diff[np.isfinite(depth_diff)] - time_diff = orig_time_diff[np.isfinite(orig_time_diff)] - - # Calculate some statistics (using original data) - median_neg_depth_diff = np.median(depth_diff[depth_diff < 0]) - median_pos_depth_diff = np.median(depth_diff[depth_diff > 0]) - max_depth_diff = np.max(depth_diff) - min_depth_diff = np.min(depth_diff) - - median_time_diff = np.median(orig_time_diff) - mean_time_diff = np.mean(orig_time_diff) - max_time_diff = np.max(orig_time_diff) - min_time_diff = np.min(orig_time_diff) - max_time_diff_hrs = max_time_diff/3600 - - # Remove the top and bottom 0.5% of values to get a better histogram - # This is hiding some data from the user - depth_diff = depth_diff[(depth_diff >= np.nanpercentile(depth_diff, 0.5)) & (depth_diff <= np.nanpercentile(depth_diff, 99.5))] - time_diff = time_diff[(time_diff >= np.nanpercentile(time_diff, 0.5)) & (time_diff <= np.nanpercentile(time_diff, 99.5))] - print('Depth and time differences have been filtered to the middle 99% of values.') - print('Numeric median/mean/max/min values are based on the original data.') - - # Histogram of depth spacing - ax[0].hist(depth_diff, bins=50, **kw) - ax[0].set_xlabel('Depth Spacing (m)') - ax[0].set_ylabel('Frequency') - ax[0].set_title('Histogram of Depth Spacing') - - annotation_text_left = ( - f'Median Negative: {median_neg_depth_diff:.2f} m\n' - f'Median Positive: {median_pos_depth_diff:.2f} m\n' - f'Max: {max_depth_diff:.2f} m\n' - f'Min: {min_depth_diff:.2f} m' - ) - # Determine the best location for the annotation based on the x-axis limits - x_upper_limit = ax[0].get_xlim()[1] - x_lower_limit = ax[0].get_xlim()[0] - if abs(x_lower_limit) > abs(x_upper_limit): - annotation_loc = (0.04, 0.96) # Top left - ha = 'left' - else: - annotation_loc = (0.96, 0.96) # Top right - ha = 'right' - ax[0].annotate(annotation_text_left, xy=annotation_loc, xycoords='axes fraction', - fontsize=def_font_size, ha=ha, va='top', - bbox=dict(boxstyle='round,pad=0.3', edgecolor='black', facecolor='white', alpha=.5)) - - # Histogram of time spacing - ax[1].hist(time_diff, bins=50, **kw) - ax[1].set_xlabel('Time Spacing (s)') - ax[1].set_ylabel('Frequency') - ax[1].set_title('Histogram of Time Spacing') - - annotation_text = ( - f'Median: {median_time_diff:.2f} s\n' - f'Mean: {mean_time_diff:.2f} s\n' - f'Max: {max_time_diff:.2f} s ({max_time_diff_hrs:.2f} hr)\n' - f'Min: {min_time_diff:.2f} s' - ) - ax[1].annotate(annotation_text, xy=(0.96, 0.96), xycoords='axes fraction', - fontsize=def_font_size, ha='right', va='top', - bbox=dict(boxstyle='round,pad=0.3', edgecolor='black', facecolor='white', alpha=.5)) - - # Set font sizes for all annotations - # Font size 14 looks roughly like fontsize 8 when I drop this figure in Word - a bit small - # Font size 14 looks like fontsize 13 when I drop the top *half* of this figure in powerpoint - acceptable - for axes in ax: - axes.xaxis.label.set_size(def_font_size) - axes.yaxis.label.set_size(def_font_size) - axes.tick_params(axis='both', which='major', labelsize=def_font_size) - # More subtle grid lines - axes.grid(True, which='both', linestyle='--', linewidth=0.5, color='grey') - - return fig, ax - -def plot_ts(ds: xr.Dataset, ax: plt.Axes = None, **kw: dict) -> tuple({plt.Figure, plt.Axes}): - """ - This function plots histograms of temperature and salinity values (middle 95%), and a 2D histogram of salinity and temperature with density contours. - - Parameters - ---------- - ds: xarray in OG1 format with at least TEMP and PSAL. - ax: Optional; axis to plot the data. - kw: Optional; additional keyword arguments for the histograms. - - Returns - ------- - Three plots: histogram of temperature, histogram of salinity, and 2D histogram of salinity and temperature with density contours. - fig: matplotlib.figure.Figure - ax: matplotlib.axes._subplots.AxesSubplot - - Original author - ---------------- - Eleanor Frajka-Williams - """ - _check_necessary_variables(ds, ['DEPTH', 'LONGITUDE', 'LATITUDE', 'PSAL', 'TEMP']) - - if ax is None: - fig, ax = plt.subplots(1, 3, figsize=(14, 4.5)) - else: - fig = plt.gcf() - # Set font sizes for all annotations - def_font_size = 14 - num_bins = 30 - - temp_orig = ds.TEMP.values - sal_orig = ds.PSAL.values - - # Reduce both to where both are finite - temp = temp_orig[np.isfinite(temp_orig)&np.isfinite(sal_orig)] - sal = sal_orig[np.isfinite(sal_orig)&np.isfinite(temp_orig)] - depth = ds.DEPTH[np.isfinite(sal_orig)&np.isfinite(temp_orig)] - long = ds.LONGITUDE[np.isfinite(sal_orig)&np.isfinite(temp_orig)] - lat = ds.LATITUDE[np.isfinite(sal_orig)&np.isfinite(temp_orig)] - - SA = gsw.SA_from_SP(sal, depth, long, lat) - CT = gsw.CT_from_t(SA, temp, depth) - - # Reduce to middle 99% of values - # This helps a lot for plotting, but is also hiding some of the data (not great for a test) - CT_filtered = CT[(CT >= np.nanpercentile(CT, .5)) & (CT <= np.nanpercentile(CT, 99.5))] - SA_filtered = SA[(SA >= np.nanpercentile(SA, .5)) & (SA <= np.nanpercentile(SA, 99.5))] - print('Temperature and Salinity values have been filtered to the middle 99% of values.') - - # Calculate density to add contours - xi = np.linspace(SA_filtered.values.min()-.2, SA_filtered.values.max()+.2, 100) - yi = np.linspace(CT_filtered.values.min()-.2, CT_filtered.values.max()+.2, 100) - xi, yi = np.meshgrid(xi, yi) - zi = gsw.sigma0(xi, yi) - - # Temperature histogram - ax[0].hist(CT_filtered, bins=num_bins, **kw) - ax[0].set_xlabel('Conservative Temperature (°C)') - ax[0].set_ylabel('Frequency') - ax[0].set_title('Histogram of Temperature') - ax[0].set_xlim(CT_filtered.min(), CT_filtered.max()) - - # Salinity histogram - ax[1].hist(SA_filtered, bins=num_bins, **kw) - ax[1].set_xlabel('Absolute Salinity ( )') - ax[1].set_ylabel('Frequency') - ax[1].set_title('Histogram of Salinity') - ax[1].set_xlim(SA_filtered.min(), SA_filtered.max()) - - # 2-d T-S histogram - h = ax[2].hist2d(SA_filtered, CT_filtered, bins=num_bins, cmap='viridis', norm=mcolors.LogNorm(), **kw) - ax[2].contour(xi, yi, zi, colors='black', alpha=0.5, linewidths=0.5) - ax[2].clabel(ax[2].contour(xi, yi, zi, colors='black', alpha=0.5, linewidths=0.5), inline=True, fontsize=def_font_size-2) - cbar = plt.colorbar(h[3], ax=ax[2]) - cbar.set_label('Log Counts') - ax[2].set_xlabel('Absolute Salinity ( )') - ax[2].set_ylabel('Conservative Temperature (°C)') - ax[2].set_title('2D Histogram (Log Scale)') - # Set x-limits based on salinity plot and y-limits based on temperature plot - ax[2].set_xlim(ax[1].get_xlim()) - ax[2].set_ylim(ax[0].get_xlim()) - - # Set font sizes for all annotations - for axes in ax: - axes.xaxis.label.set_size(def_font_size) - axes.yaxis.label.set_size(def_font_size) - axes.tick_params(axis='both', which='major', labelsize=def_font_size) - axes.grid(True, which='both', linestyle='--', linewidth=0.5, color='grey') - - - # Adjust the width of ax[1] to match the size of the frame of ax[2] - box1 = ax[1].get_position() - box2 = ax[2].get_position() - dw = box1.width-box2.width - ax[1].set_position([box1.x0+dw, box1.y0, box2.width, box1.height]) - # Adjust the height of ax[2] to match the width of ax[0] - box0 = ax[0].get_position() - dh = box0.height - box2.height - ax[2].set_position([box2.x0, box2.y0 - dh, box2.width, box0.width]) - # Adjust the height of ax[2] to match the width of ax[0] - box0 = ax[0].get_position() - box2 = ax[2].get_position() - fig_width, fig_height = fig.get_size_inches() - new_height = box0.width * fig_width / fig_height - ax[2].set_position([box2.x0, box2.y0, box2.width, new_height]) - - return fig, ax - - def calc_DEPTH_Z(ds): """ Calculate the depth (Z position) of the glider using the gsw library to convert pressure to depth. @@ -1070,7 +176,7 @@ def calc_DEPTH_Z(ds): ---------------- Eleanor Frajka-Williams """ - _check_necessary_variables(ds, ['PRES', 'LONGITUDE', 'LATITUDE']) + utilities._check_necessary_variables(ds, ['PRES', 'LONGITUDE', 'LATITUDE']) # Initialize the new variable with the same dimensions as dive_num ds['DEPTH_Z'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan)) @@ -1108,7 +214,7 @@ def calc_w_meas(ds): ---------------- Eleanor Frajka-Williams """ - _check_necessary_variables(ds, ['TIME']) + utilities._check_necessary_variables(ds, ['TIME']) # Ensure inputs are numpy arrays time = ds.TIME.values if 'DEPTH_Z' not in ds.variables and all(var in ds.variables for var in ['PRES', 'LATITUDE', 'LONGITUDE']): @@ -1161,7 +267,7 @@ def calc_w_sw(ds): Eleanor Frajka-Williams """ # Eleanor's note: This could be bundled with calc_glider_w_from_depth, but keeping them separate allows for some extra testing/flexibility for the user. - _check_necessary_variables(ds, ['GLIDER_VERT_VELO_MODEL', 'GLIDER_VERT_VELO_DZDT']) + utilities._check_necessary_variables(ds, ['GLIDER_VERT_VELO_MODEL', 'GLIDER_VERT_VELO_DZDT']) # Calculate the vertical seawater velocity vert_sw_speed = ds['GLIDER_VERT_VELO_DZDT'].values - ds['GLIDER_VERT_VELO_MODEL'].values @@ -1170,160 +276,6 @@ def calc_w_sw(ds): ds = ds.assign(VERT_CURR_MODEL=(('N_MEASUREMENTS'), vert_sw_speed, {'long_name': 'vertical_current_of_seawater_derived_from_glider_flight_model', 'units': 'm s-1'})) return ds - -def plot_vertical_speeds_with_histograms(ds, start_prof=None, end_prof=None): - """ - Plot vertical speeds with histograms for diagnostic purposes. - This function generates a diagnostic plot for the calculation of vertical seawater velocity. - It plots the modelled and computed (from dz/dt) vertical velocities as line plots, where these - should be similar. The difference between these velocities is the implied seawater velocity, - which should be closer to zero than the vehicle velocities. The histogram provides a visual - representation to identify any biases. The final calculation of the median should be close to - zero if a large enough sample of dives is input and if the glider flight model is well-tuned. - - Parameters - ---------- - ds (xarray.Dataset): The dataset containing the vertical speed data where - - VERT_GLIDER_SPEED is the modelled glider speed - - VERT_SPEED_DZDT is the computed glider speed from the pressure sensor - - VERT_SW_SPEED is the implied seawater velocity. - start_prof (int, optional): The starting profile number for subsetting the dataset. Defaults to first profile number. - end_prof (int, optional): The ending profile number for subsetting the dataset. Defaults to last profile number. - - Returns - ------- - fig, axs (tuple): The figure and axes objects for the plot. - - Original author - ---------------- - Eleanor Frajka-Williams - """ - _check_necessary_variables(ds, ['GLIDER_VERT_VELO_MODEL', 'GLIDER_VERT_VELO_DZDT', 'VERT_CURR_MODEL','PROFILE_NUMBER']) - - if start_prof is None: - start_prof = int(ds['PROFILE_NUMBER'].values.mean())-10 - - if end_prof is None: - end_prof = int(ds['PROFILE_NUMBER'].values.mean())+10 - - ds = ds.where((ds['PROFILE_NUMBER'] >= start_prof) & (ds['PROFILE_NUMBER'] <= end_prof), drop=True) - vert_curr = ds.VERT_CURR_MODEL.values * 100 # Convert to cm/s - vert_dzdt = ds.GLIDER_VERT_VELO_DZDT.values * 100 # Convert to cm/s - vert_model = ds.GLIDER_VERT_VELO_MODEL.values * 100 # Convert to cm/s - - # Calculate the median line for the lower right histogram - median_vert_sw_speed = np.nanmedian(vert_curr) - - # Create a dictionary to map the variable names to their labels for legends - labels_dict = { - 'vert_dzdt': 'w$_{meas}$ (from dz/dt)', - 'vert_model': 'w$_{model}$ (flight model)', - 'vert_curr': 'w$_{sw}$ (calculated)' - } - - fig, axs = plt.subplots(2, 2, figsize=(14, 12), gridspec_kw={'width_ratios': [3, 1]}) - - # Upper left subplot for vertical velocity and glider speed - ax1 = axs[0, 0] - ax1.axhline(0, color='gray', linestyle='-', linewidth=0.5) # Add zero horizontal line - ax1.plot(ds['TIME'], vert_dzdt, label=labels_dict['vert_dzdt']) - ax1.plot(ds['TIME'], vert_model, color='r', label=labels_dict['vert_model']) - ax1.plot(ds['TIME'], vert_curr, color='g', label=labels_dict['vert_curr']) - # Annotations - ax1.set_xlabel('Time') - ax1.set_ylabel('Vertical Velocity (cm/s)') - ax1.legend(loc='lower left') - ax1.xaxis.set_major_formatter(DateFormatter('%d-%b')) - ax1.legend(loc='lower right') - - # Upper right subplot for histogram of vertical velocity - ax1_hist = axs[0, 1] - ax1_hist.hist(vert_dzdt, bins=50, orientation='horizontal', alpha=0.5, color='blue', label=labels_dict['vert_dzdt']) - ax1_hist.hist(vert_model, bins=50, orientation='horizontal', alpha=0.5, color='red', label=labels_dict['vert_model']) - ax1_hist.hist(vert_curr, bins=50, orientation='horizontal', alpha=0.5, color='green', label=labels_dict['vert_curr']) - ax1_hist.set_xlabel('Frequency') - - # Determine the best location for the legend based on the y-axis limits and zero - y_upper_limit = ax1_hist.get_ylim()[1] - y_lower_limit = ax1_hist.get_ylim()[0] - if abs(y_upper_limit) > abs(y_lower_limit): - legend_loc = 'upper right' - else: - legend_loc = 'lower right' - ax1_hist.legend(loc=legend_loc) - - # Lower left subplot for vertical water speed - ax2 = axs[1, 0] - ax2.axhline(0, color='gray', linestyle='-', linewidth=0.5) # Add zero horizontal line - ax2.plot(ds['TIME'], vert_curr, 'g', label=labels_dict['vert_curr']) - # Annotations - ax2.set_xlabel('Time') - ax2.set_ylabel('Vertical Water Speed (cm/s)') - ax2.legend(loc='upper left') - ax2.xaxis.set_major_formatter(DateFormatter('%d-%b')) - - # Lower right subplot for histogram of vertical water speed - ax2_hist = axs[1, 1] - ax2_hist.hist(vert_curr, bins=50, orientation='horizontal', alpha=0.5, color='green', label=labels_dict['vert_curr']) - ax2_hist.axhline(median_vert_sw_speed, color='red', linestyle='dashed', linewidth=1, label=f'Median: {median_vert_sw_speed:.2f} cm/s') - ax2_hist.set_xlabel('Frequency') - - # Determine the best location for the legend based on the y-axis limits and median - y_upper_limit = ax2_hist.get_ylim()[1] - y_lower_limit = ax2_hist.get_ylim()[0] - if abs(y_upper_limit - median_vert_sw_speed) > abs(y_lower_limit - median_vert_sw_speed): - legend_loc = 'upper right' - else: - legend_loc = 'lower right' - ax2_hist.legend(loc=legend_loc) - - # Set font sizes for all annotations - # Font size 14 looks roughly like fontsize 8 when I drop this figure in Word - a bit small - # Font size 14 looks like fontsize 13 when I drop the top *half* of this figure in powerpoint - acceptable - def_font_size = 14 - for ax in [ax1, ax2, ax1_hist, ax2_hist]: - ax.yaxis.label.set_size(def_font_size) - ax.legend(fontsize=def_font_size) - ax.tick_params(axis='both', which='major', labelsize=def_font_size) - ax.xaxis.label.set_size(def_font_size) - - # Adjust the axes so that the distance between y-ticks on the top and lower panel is the same - # Get the y-axis range of the top left plot - y1_range = ax1.get_ylim()[1] - ax1.get_ylim()[0] - # Get the y-axis limits of the lower left plot - y2_range = ax2.get_ylim()[1] - ax2.get_ylim()[0] - # Get the height in inches of the top left plot - box1 = ax1.get_position() - height1 = box1.height - # Get the height in inches of the lower left plot - box2 = ax2.get_position() - height2 = box2.height - # Set a scaled height for the lower left plot - new_height = height1 * y2_range / y1_range - # Determine the change in height - height_change = height2 - new_height - # Shift the y-position of the lower left plot by the change in height - ax2.set_position([box2.x0, box2.y0 + height_change, box2.width, new_height]) - - # Get the position of the lower right plot - box2_hist = ax2_hist.get_position() - # Adjust the position of the lower right plot to match the height of the lower left plot - ax2_hist.set_position([box2_hist.x0, box2_hist.y0 + height_change, box2_hist.width, new_height]) - - # Find the distance between the right edge of the top left plot and the left edge of the top right plot - box1_hist = ax1_hist.get_position() - distance = box1_hist.x0 - (box1.x0 + box1.width) - shift_dist = distance/3 # Not sure this will always work; it may depend on the def_fault_size - # Adjust the width of the top right plot to extend left by half the distance - ax1_hist.set_position([box1_hist.x0 - shift_dist, box1_hist.y0, box1_hist.width + shift_dist, box1_hist.height]) - # Adjust the width of the bottom right plot to extend left by half the distance - box2_hist = ax2_hist.get_position() - ax2_hist.set_position([box2_hist.x0 - shift_dist, box2_hist.y0, box2_hist.width + shift_dist, box2_hist.height]) - - plt.show() - - return fig, axs - def quant_binavg(ds, var='VERT_CURR', zgrid=None, dz=None): """ Calculate the bin average of vertical velocities within specified depth ranges. @@ -1350,7 +302,7 @@ def quant_binavg(ds, var='VERT_CURR', zgrid=None, dz=None): ---------------- Eleanor Frajka-Williams """ - _check_necessary_variables(ds, [var, 'PRES']) + utilities._check_necessary_variables(ds, [var, 'PRES']) press = ds.PRES.values ww = ds[var].values @@ -1426,68 +378,4 @@ def findbetw(arr, bounds): "CIlimits": CIlimits } ) - return ds_out - -def plot_combined_velocity_profiles(ds_out_dives: xr.Dataset, ds_out_climbs: xr.Dataset): - """ - Plots combined vertical velocity profiles for dives and climbs. - - Replicates Fig 3 in Frajka-Williams et al. 2011, but using an updated dataset from Jim Bennett (2013), - now in OG1 format as sg014_20040924T182454_delayed.nc. Note that flight model parameters may differ from those in the paper. - - Parameters - ---------- - ds_out_dives (xarray.Dataset): Dataset containing dive profiles with variables 'zgrid', 'meanw', 'w_lower', and 'w_upper'. - ds_out_climbs (xarray.Dataset): Dataset containing climb profiles with variables 'zgrid', 'meanw', 'w_lower', and 'w_upper'. - - The function converts vertical velocities from m/s to cm/s, plots the mean vertical velocities and their ranges for both dives and climbs, and customizes the plot with labels, legends, and axis settings. - - Note - ---- - Assumes that the vertical velocities are in m/s and the depth grid is in meters. - - Original author - ---------------- - Eleanor Frajka-Williams - """ - conv_factor = 100 # Convert m/s to cm/s - depth_negative = ds_out_dives.zgrid.values * -1 - meanw_dives = ds_out_dives.meanw.values * conv_factor - zgrid_dives = depth_negative - w_lower_dives = ds_out_dives.w_lower.values * conv_factor - w_upper_dives = ds_out_dives.w_upper.values * conv_factor - - meanw_climbs = ds_out_climbs.meanw.values * conv_factor - zgrid_climbs = ds_out_climbs.zgrid.values * -1 - w_lower_climbs = ds_out_climbs.w_lower.values * conv_factor - w_upper_climbs = ds_out_climbs.w_upper.values * conv_factor - - fig, ax = plt.subplots(1, 1, figsize=(4.8, 4.8)) - - ax.xaxis.label.set_size(14) - ax.yaxis.label.set_size(14) - ax.tick_params(axis='both', which='major', labelsize=14) - - # Plot dives - ax.fill_betweenx(zgrid_dives, w_lower_dives, w_upper_dives, color='black', alpha=0.3) - ax.plot(meanw_dives, zgrid_dives, color='black', label='w$_{dive}$') - - # Plot climbs - ax.fill_betweenx(zgrid_climbs, w_lower_climbs, w_upper_climbs, color='red', alpha=0.3) - ax.plot(meanw_climbs, zgrid_climbs, color='red', label='w$_{climb}$') - - ax.invert_yaxis() # Invert y-axis to show depth increasing downwards - ax.axvline(x=0, color='darkgray', linestyle='-', linewidth=0.5) # Add vertical line through 0 - ax.set_xlabel('Vertical Velocity w$_{sw}$ (cm s$^{-1}$)') - ax.set_ylabel('Depth (m)') - ax.set_ylim(top=0, bottom=1000) # Set y-limit maximum to zero - ax.legend(fontsize=14) - #ax.set_title('Combined Vertical Velocity Profiles') - - ax.set_xlim(-1, 1.5) - ax.set_xticks([-1, -0.5, 0, 0.5, 1.0, 1.5]) - plt.tight_layout() - ax.spines['right'].set_visible(False) - ax.spines['top'].set_visible(False) - plt.show() - ax.tick_params(axis='both', which='major', labelsize=12) + return ds_out \ No newline at end of file diff --git a/glidertest/utilities.py b/glidertest/utilities.py new file mode 100644 index 0000000..abaf771 --- /dev/null +++ b/glidertest/utilities.py @@ -0,0 +1,265 @@ +import numpy as np +import pandas as pd +import xarray as xr +from pandas import DataFrame +from skyfield import almanac +from skyfield import api +import gsw +import warnings + + +def _check_necessary_variables(ds: xr.Dataset, vars: list): + """ + Checks that all of a list of variables are present in a dataset + + Parameters + ---------- + ds (xarray.Dataset): _description_ + vars (list): _description_ + + Raises + ---------- + KeyError: Raises an error if all vars not present in ds + + Original author + ---------------- + Callum Rollo + """ + missing_vars = set(vars).difference(set(ds.variables)) + if missing_vars: + msg = f"Required variables {list(missing_vars)} do not exist in the supplied dataset." + raise KeyError(msg) + + +def _calc_teos10_variables(ds): + """ + Calculates TEOS 10 variables not present in the dataset + :param ds: + :return: + """ + _check_necessary_variables(ds, ['DEPTH', 'LONGITUDE', 'LATITUDE', 'TEMP', 'PSAL']) + if 'DENSITY' not in ds.variables: + SA = gsw.SA_from_SP(ds.PSAL, ds.DEPTH, ds.LONGITUDE, ds.LATITUDE) + CT = gsw.CT_from_t(SA, ds.TEMP, ds.DEPTH) + ds['DENSITY'] = ('N_MEASUREMENTS', gsw.rho(SA, CT, ds.DEPTH).values) + return ds + + +def construct_2dgrid(x, y, v, xi=1, yi=1): + """ + Function to grid data + + Parameters + ---------- + x: data with data for the x dimension + y: data with data for the y dimension + v: data with data for the z dimension + xi: Horizontal resolution for the gridding + yi: Vertical resolution for the gridding + + Returns + ------- + grid: z data gridded in x and y space with xi and yi resolution + XI: x data gridded in x and y space with xi and yi resolution + YI: y data gridded in x and y space with xi and yi resolution + + Original author + ---------------- + Bastien Queste (https://github.com/bastienqueste/gliderad2cp/blob/de0652f70f4768c228f83480fa7d1d71c00f9449/gliderad2cp/process_adcp.py#L140) + + """ + if np.size(xi) == 1: + xi = np.arange(np.nanmin(x), np.nanmax(x) + xi, xi) + if np.size(yi) == 1: + yi = np.arange(np.nanmin(y), np.nanmax(y) + yi, yi) + raw = pd.DataFrame({'x': x, 'y': y, 'v': v}).dropna() + grid = np.full([np.size(xi), np.size(yi)], np.nan) + raw['xbins'], xbin_iter = pd.cut(raw.x, xi, retbins=True, labels=False) + raw['ybins'], ybin_iter = pd.cut(raw.y, yi, retbins=True, labels=False) + _tmp = raw.groupby(['xbins', 'ybins'])['v'].agg('median') + grid[_tmp.index.get_level_values(0).astype(int), _tmp.index.get_level_values(1).astype(int)] = _tmp.values + YI, XI = np.meshgrid(yi, xi) + return grid, XI, YI + +def compute_cline(var, depth_array): + """ + Find the depth of the maximum vertical difference for a specified variables + + Parameters + ---------- + var: 2D array containing data from a selected variable gridded over time/profile/distance etc. and depth (y-axis)) + depth_array: 2D array containing pressure/depth data gridded over time/profile/distance etc. and depth (y-axis)) + + Returns + ------- + 1D array containing the depth of the cline at each timestamp/profile/distance etc. + Original author + ---------------- + Chiara Monforte + """ + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=RuntimeWarning) + clin = np.where(np.abs(np.diff(np.nanmean(var, axis=0))) == np.nanmax(np.abs(np.diff(np.nanmean(var, axis=0))))) + return np.round(depth_array[0, clin[0]], 1) + + +def compute_sunset_sunrise(time, lat, lon): + """ + Calculates the local sunrise/sunset of the glider location from GliderTools. + + The function uses the Skyfield package to calculate the sunrise and sunset + times using the date, latitude and longitude. The times are returned + rather than day or night indices, as it is more flexible for the quenching + correction. + + + Parameters + ---------- + time: numpy.ndarray or pandas.Series + The date & time array in a numpy.datetime64 format. + lat: numpy.ndarray or pandas.Series + The latitude of the glider position. + lon: numpy.ndarray or pandas.Series + The longitude of the glider position. + + Returns + ------- + sunrise: numpy.ndarray + An array of the sunrise times. + sunset: numpy.ndarray + An array of the sunset times. + + Original author + ---------------- + Function from GliderTools (https://github.com/GliderToolsCommunity/GliderTools/blob/master/glidertools/optics.py) + + """ + + ts = api.load.timescale() + eph = api.load("de421.bsp") + + df = DataFrame.from_dict(dict([("time", time), ("lat", lat), ("lon", lon)])) + + # set days as index + df = df.set_index(df.time.values.astype("datetime64[D]")) + + # groupby days and find sunrise/sunset for unique days + grp_avg = df.groupby(df.index).mean(numeric_only=False) + date = grp_avg.index + + time_utc = ts.utc(date.year, date.month, date.day, date.hour) + time_utc_offset = ts.utc( + date.year, date.month, date.day + 1, date.hour + ) # add one day for each unique day to compute sunrise and sunset pairs + + bluffton = [] + for i in range(len(grp_avg.lat)): + bluffton.append(api.wgs84.latlon(grp_avg.lat.iloc[i], grp_avg.lon.iloc[i])) + bluffton = np.array(bluffton) + + sunrise = [] + sunset = [] + for n in range(len(bluffton)): + + f = almanac.sunrise_sunset(eph, bluffton[n]) + t, y = almanac.find_discrete(time_utc[n], time_utc_offset[n], f) + + if not t: + if f(time_utc[n]): # polar day + sunrise.append( + pd.Timestamp( + date[n].year, date[n].month, date[n].day, 0, 1 + ).to_datetime64() + ) + sunset.append( + pd.Timestamp( + date[n].year, date[n].month, date[n].day, 23, 59 + ).to_datetime64() + ) + else: # polar night + sunrise.append( + pd.Timestamp( + date[n].year, date[n].month, date[n].day, 11, 59 + ).to_datetime64() + ) + sunset.append( + pd.Timestamp( + date[n].year, date[n].month, date[n].day, 12, 1 + ).to_datetime64() + ) + + else: + sr = t[y == 1] # y=1 sunrise + sn = t[y == 0] # y=0 sunset + + sunup = pd.to_datetime(sr.utc_iso()).tz_localize(None) + sundown = pd.to_datetime(sn.utc_iso()).tz_localize(None) + + # this doesn't look very efficient at the moment, but I was having issues with getting the datetime64 + # to be compatible with the above code to handle polar day and polar night + + su = pd.Timestamp( + sunup.year[0], + sunup.month[0], + sunup.day[0], + sunup.hour[0], + sunup.minute[0], + ).to_datetime64() + + sd = pd.Timestamp( + sundown.year[0], + sundown.month[0], + sundown.day[0], + sundown.hour[0], + sundown.minute[0], + ).to_datetime64() + + sunrise.append(su) + sunset.append(sd) + + sunrise = np.array(sunrise).squeeze() + sunset = np.array(sunset).squeeze() + + grp_avg["sunrise"] = sunrise + grp_avg["sunset"] = sunset + + # reindex days to original dataframe as night + df_reidx = grp_avg.reindex(df.index) + sunrise, sunset = df_reidx[["sunrise", "sunset"]].values.T + + return sunrise, sunset + +def calc_DEPTH_Z(ds): + """ + Calculate the depth (Z position) of the glider using the gsw library to convert pressure to depth. + + Parameters + ---------- + ds (xarray.Dataset): The input dataset containing 'PRES', 'LATITUDE', and 'LONGITUDE' variables. + + Returns + ------- + xarray.Dataset: The dataset with an additional 'DEPTH_Z' variable. + + Original author + ---------------- + Eleanor Frajka-Williams + """ + _check_necessary_variables(ds, ['PRES', 'LONGITUDE', 'LATITUDE']) + + # Initialize the new variable with the same dimensions as dive_num + ds['DEPTH_Z'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan)) + + # Calculate depth using gsw + depth = gsw.z_from_p(ds['PRES'], ds['LATITUDE']) + ds['DEPTH_Z'] = depth + + # Assign the calculated depth to a new variable in the dataset + ds['DEPTH_Z'].attrs = { + "units": "meters", + "positive": "up", + "standard_name": "depth", + "comment": "Depth calculated from pressure using gsw library, positive up.", + } + + return ds diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb index 671ee41..6f0b119 100644 --- a/notebooks/demo.ipynb +++ b/notebooks/demo.ipynb @@ -28,7 +28,7 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from glidertest import fetchers\n", - "from glidertest import tools" + "from glidertest import tools, utilities, plots" ] }, { @@ -114,7 +114,7 @@ "# Plot about 20 profiles to see the behaviour of the flight model\n", "start_prof = 400\n", "end_prof = 420\n", - "tools.plot_vertical_speeds_with_histograms(ds_sg014, start_prof, end_prof)" + "plots.plot_vertical_speeds_with_histograms(ds_sg014, start_prof, end_prof)" ] }, { @@ -138,7 +138,7 @@ "ds_out_climbs = tools.quant_binavg(ds_climbs, var = 'VERT_CURR_MODEL', dz=10)\n", "\n", "# Plot the profiles (compare to Fig 3 and Fig 4 in Frajka-Williams et al. 2011)\n", - "tools.plot_combined_velocity_profiles(ds_out_dives, ds_out_climbs)" + "plots.plot_combined_velocity_profiles(ds_out_dives, ds_out_climbs)" ] }, { @@ -157,7 +157,7 @@ "outputs": [], "source": [ "# Basic plot of the location of the dataset in space/time\n", - "tools.plot_glider_track(ds)" + "plots.plot_glider_track(ds)" ] }, { @@ -168,7 +168,7 @@ "outputs": [], "source": [ "# Basic diagnostics of the gridding in the dataset\n", - "tools.plot_grid_spacing(ds)" + "plots.plot_grid_spacing(ds)" ] }, { @@ -179,7 +179,7 @@ "outputs": [], "source": [ "# Basic diagnostics of the watermass properties\n", - "tools.plot_ts(ds)" + "plots.plot_ts(ds)" ] }, { @@ -197,7 +197,7 @@ "metadata": {}, "outputs": [], "source": [ - "fig, ax =tools.plot_basic_vars(ds,v_res=1, start_prof=0, end_prof=int(ds.PROFILE_NUMBER.max()))" + "fig, ax = plots.plot_basic_vars(ds,v_res=1, start_prof=0, end_prof=int(ds.PROFILE_NUMBER.max()))" ] }, { @@ -221,7 +221,7 @@ "outputs": [], "source": [ "tools.check_monotony(ds.PROFILE_NUMBER)\n", - "tools.plot_prof_monotony(ds)" + "plots.plot_prof_monotony(ds)" ] }, { @@ -241,10 +241,10 @@ "source": [ "fig, ax = plt.subplots(1, 4, figsize=(15, 5))\n", "\n", - "tools.plot_updown_bias(tools.quant_updown_bias(ds, var='TEMP', v_res=1), ax[0], xlabel='Temperature [C]')\n", - "tools.plot_updown_bias(tools.quant_updown_bias(ds, var='PSAL', v_res=1), ax[1], xlabel='Salinity [PSU]')\n", - "tools.plot_updown_bias(tools.quant_updown_bias(ds, var='DOXY', v_res=1), ax[2], xlabel='Dissolved Oxygen [mmol m-3]')\n", - "tools.plot_updown_bias(tools.quant_updown_bias(ds, var='CHLA', v_res=1), ax[3], xlabel='Chlorophyll [mg m-3]')\n", + "plots.plot_updown_bias(tools.quant_updown_bias(ds, var='TEMP', v_res=1), ax[0], xlabel='Temperature [C]')\n", + "plots.plot_updown_bias(tools.quant_updown_bias(ds, var='PSAL', v_res=1), ax[1], xlabel='Salinity [PSU]')\n", + "plots.plot_updown_bias(tools.quant_updown_bias(ds, var='DOXY', v_res=1), ax[2], xlabel='Dissolved Oxygen [mmol m-3]')\n", + "plots.plot_updown_bias(tools.quant_updown_bias(ds, var='CHLA', v_res=1), ax[3], xlabel='Chlorophyll [mg m-3]')\n", "\n", "\n", "ax[0].set_ylabel('Depth [m]')" @@ -283,7 +283,7 @@ "metadata": {}, "outputs": [], "source": [ - "tools.process_optics_assess(ds, var='CHLA')" + "plots.process_optics_assess(ds, var='CHLA')" ] }, { @@ -293,7 +293,7 @@ "metadata": {}, "outputs": [], "source": [ - "tools.check_temporal_drift(ds, var='CHLA')" + "plots.check_temporal_drift(ds, var='CHLA')" ] }, { @@ -306,7 +306,7 @@ "# Let's visually check a section of chlorphyll and see if we observe any NPQ\n", "fig, ax = plt.subplots(1, 1, figsize=(15, 5))\n", "\n", - "tools.plot_quench_assess(ds, 'CHLA', ax, ylim=35)" + "plots.plot_quench_assess(ds, 'CHLA', ax, ylim=35)" ] }, { @@ -331,9 +331,9 @@ "source": [ "fig, ax = plt.subplots(1, 3, figsize=(15, 5))\n", "\n", - "tools.plot_daynight_avg( dayT, nightT, ax[0], xlabel='Temperature [C]')\n", - "tools.plot_daynight_avg( dayS, nightS, ax[1], xlabel='Salinity [PSU]')\n", - "tools.plot_daynight_avg( dayC, nightC, ax[2], xlabel='Chlorophyll [mg m-3]')" + "plots.plot_daynight_avg( dayT, nightT, ax[0], xlabel='Temperature [C]')\n", + "plots.plot_daynight_avg( dayS, nightS, ax[1], xlabel='Salinity [PSU]')\n", + "plots.plot_daynight_avg( dayC, nightC, ax[2], xlabel='Chlorophyll [mg m-3]')" ] }, { @@ -377,7 +377,7 @@ "outputs": [], "source": [ "fig, ax = plt.subplots(1, 1, figsize=(5, 5))\n", - "tools.plot_updown_bias(tools.quant_updown_bias(ds, var='DPAR', v_res=1), ax, xlabel='Irradiance')" + "plots.plot_updown_bias(tools.quant_updown_bias(ds, var='DPAR', v_res=1), ax, xlabel='Irradiance')" ] }, { @@ -418,7 +418,7 @@ "metadata": {}, "outputs": [], "source": [ - "tools.process_optics_assess(ds, var='BBP700')" + "plots.process_optics_assess(ds, var='BBP700')" ] }, { @@ -440,8 +440,16 @@ "metadata": {}, "outputs": [], "source": [ - "tools.check_temporal_drift(ds, var='DOXY')" + "plots.check_temporal_drift(ds, var='DOXY')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77310fff-49d8-412f-9fbd-617a429e3ea1", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/tests/test_plots.py b/tests/test_plots.py new file mode 100644 index 0000000..0f1cbfd --- /dev/null +++ b/tests/test_plots.py @@ -0,0 +1,97 @@ +import pytest +from glidertest import fetchers, tools, utilities, plots +import matplotlib.pyplot as plt +import math +import numpy as np +import matplotlib +matplotlib.use('agg') # use agg backend to prevent creating plot windows during tests + + +def test_plots(start_prof=0, end_prof=100): + ds = fetchers.load_sample_dataset() + ds = ds.drop_vars(['DENSITY']) + fig, ax = plots.plot_basic_vars(ds,start_prof=start_prof, end_prof=end_prof) + assert ax[0].get_ylabel() == 'Depth (m)' + assert ax[0].get_xlabel() == f'Average Temperature [C] \nbetween profile {start_prof} and {end_prof}' + + +def test_up_down_bias(v_res=1, xlabel='Salinity'): + ds = fetchers.load_sample_dataset() + fig, ax = plt.subplots() + df = tools.quant_updown_bias(ds, var='PSAL', v_res=v_res) + plots.plot_updown_bias(df, ax, xlabel=xlabel) + lims = np.abs(df.dc) + assert ax.get_xlim() == (-np.nanpercentile(lims, 99.5), np.nanpercentile(lims, 99.5)) + assert ax.get_ylim() == (df.depth.max() + 1, -df.depth.max() / 30) + assert ax.get_xlabel() == xlabel + # check without passing axis + new_fig, new_ax = plots.plot_updown_bias(df, xlabel=xlabel) + assert new_ax.get_xlim() == (-np.nanpercentile(lims, 99.5), np.nanpercentile(lims, 99.5)) + assert new_ax.get_ylim() == (df.depth.max() + 1, -df.depth.max() / 30) + assert new_ax.get_xlabel() == xlabel + + +def test_chl(var1='CHLA', var2='BBP700'): + ds = fetchers.load_sample_dataset() + ax = plots.process_optics_assess(ds, var=var1) + assert ax.get_ylabel() == var1 + ax = plots.process_optics_assess(ds, var=var2) + assert ax.get_ylabel() == var2 + with pytest.raises(KeyError) as e: + plots.process_optics_assess(ds, var='nonexistent_variable') + + +def test_quench_sequence(xlabel='Temperature [C]',ylim=45): + ds = fetchers.load_sample_dataset() + if not "TIME" in ds.indexes.keys(): + ds = ds.set_xindex('TIME') + fig, ax = plt.subplots() + plots.plot_quench_assess(ds, 'CHLA', ax,ylim=ylim) + assert ax.get_ylabel() == 'Depth [m]' + assert ax.get_ylim() == (ylim, -ylim / 30) + + dayT, nightT = tools.compute_daynight_avg(ds, sel_var='TEMP') + fig, ax = plots.plot_daynight_avg(dayT, nightT,xlabel=xlabel) + assert ax.get_ylabel() == 'Depth [m]' + assert ax.get_xlabel() == xlabel + + +def test_temporal_drift(var='DOXY'): + ds = fetchers.load_sample_dataset() + fig, ax = plt.subplots(1, 2) + plots.check_temporal_drift(ds,var, ax) + assert ax[1].get_ylabel() == 'Depth (m)' + assert ax[0].get_ylabel() == var + assert ax[1].get_xlim() == (np.nanpercentile(ds[var], 0.01), np.nanpercentile(ds[var], 99.99)) + plots.check_temporal_drift(ds,'CHLA') + + +def test_profile_check(): + ds = fetchers.load_sample_dataset() + tools.check_monotony(ds.PROFILE_NUMBER) + plots.plot_prof_monotony(ds) + + +def test_basic_statistics(): + ds = fetchers.load_sample_dataset() + plots.plot_glider_track(ds) + plots.plot_grid_spacing(ds) + plots.plot_ts(ds) + + +def test_vert_vel(): + ds_sg014 = fetchers.load_sample_dataset(dataset_name="sg014_20040924T182454_delayed_subset.nc") + ds_sg014 = tools.calc_w_meas(ds_sg014) + ds_sg014 = tools.calc_w_sw(ds_sg014) + plots.plot_vertical_speeds_with_histograms(ds_sg014) + ds_dives = ds_sg014.sel(N_MEASUREMENTS=ds_sg014.PHASE == 2) + ds_climbs = ds_sg014.sel(N_MEASUREMENTS=ds_sg014.PHASE == 1) + ds_out_dives = tools.quant_binavg(ds_dives, var = 'VERT_CURR_MODEL', dz=10) + ds_out_climbs = tools.quant_binavg(ds_climbs, var = 'VERT_CURR_MODEL', dz=10) + plots.plot_combined_velocity_profiles(ds_out_dives, ds_out_climbs) + # extra tests for ramsey calculations of DEPTH_Z + ds_climbs = ds_climbs.drop_vars(['DEPTH_Z']) + tools.quant_binavg(ds_climbs, var='VERT_CURR_MODEL', dz=10) + ds_climbs = ds_climbs.drop_vars(['LATITUDE']) + with pytest.raises(KeyError) as e: + tools.quant_binavg(ds_climbs, var='VERT_CURR_MODEL', dz=10) diff --git a/tests/test_tools.py b/tests/test_tools.py index 7efdc58..5727b7e 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -7,76 +7,21 @@ matplotlib.use('agg') # use agg backend to prevent creating plot windows during tests -def test_plots(start_prof=0, end_prof=100): +def test_updown_bias(v_res=1, xlabel='Salinity'): ds = fetchers.load_sample_dataset() - ds = ds.drop_vars(['DENSITY']) - fig, ax = tools.plot_basic_vars(ds,start_prof=start_prof, end_prof=end_prof) - assert ax[0].get_ylabel() == 'Depth (m)' - assert ax[0].get_xlabel() == f'Average Temperature [C] \nbetween profile {start_prof} and {end_prof}' - - -def test_up_down_bias(v_res=1, xlabel='Salinity'): - ds = fetchers.load_sample_dataset() - fig, ax = plt.subplots() df = tools.quant_updown_bias(ds, var='PSAL', v_res=v_res) bins = np.unique(np.round(ds.DEPTH,0)) ncell = math.ceil(len(bins)/v_res) assert len(df) == ncell - tools.plot_updown_bias(df, ax, xlabel=xlabel) - lims = np.abs(df.dc) - assert ax.get_xlim() == (-np.nanpercentile(lims, 99.5), np.nanpercentile(lims, 99.5)) - assert ax.get_ylim() == (df.depth.max() + 1, -df.depth.max() / 30) - assert ax.get_xlabel() == xlabel - # check without passing axis - new_fig, new_ax = tools.plot_updown_bias(df, xlabel=xlabel) - assert new_ax.get_xlim() == (-np.nanpercentile(lims, 99.5), np.nanpercentile(lims, 99.5)) - assert new_ax.get_ylim() == (df.depth.max() + 1, -df.depth.max() / 30) - assert new_ax.get_xlabel() == xlabel - -def test_chl(var1='CHLA', var2='BBP700'): - ds = fetchers.load_sample_dataset() - ax = tools.process_optics_assess(ds, var=var1) - assert ax.get_ylabel() == var1 - ax = tools.process_optics_assess(ds, var=var2) - assert ax.get_ylabel() == var2 - with pytest.raises(KeyError) as e: - tools.process_optics_assess(ds, var='nonexistent_variable') - - -def test_quench_sequence(xlabel='Temperature [C]',ylim=45): +def test_daynight(xlabel='Temperature [C]',ylim=45): ds = fetchers.load_sample_dataset() if not "TIME" in ds.indexes.keys(): ds = ds.set_xindex('TIME') - fig, ax = plt.subplots() - tools.plot_quench_assess(ds, 'CHLA', ax,ylim=ylim) - assert ax.get_ylabel() == 'Depth [m]' - assert ax.get_ylim() == (ylim, -ylim / 30) - + dayT, nightT = tools.compute_daynight_avg(ds, sel_var='TEMP') assert len(nightT.dat.dropna()) > 0 assert len(dayT.dat.dropna()) > 0 - - fig, ax = tools.plot_daynight_avg(dayT, nightT,xlabel=xlabel) - assert ax.get_ylabel() == 'Depth [m]' - assert ax.get_xlabel() == xlabel - - -def test_temporal_drift(var='DOXY'): - ds = fetchers.load_sample_dataset() - fig, ax = plt.subplots(1, 2) - tools.check_temporal_drift(ds,var, ax) - assert ax[1].get_ylabel() == 'Depth (m)' - assert ax[0].get_ylabel() == var - assert ax[1].get_xlim() == (np.nanpercentile(ds[var], 0.01), np.nanpercentile(ds[var], 99.99)) - tools.check_temporal_drift(ds,'CHLA') - - -def test_profile_check(): - ds = fetchers.load_sample_dataset() - tools.check_monotony(ds.PROFILE_NUMBER) - tools.plot_prof_monotony(ds) - def test_check_monotony(): ds = fetchers.load_sample_dataset() @@ -86,35 +31,19 @@ def test_check_monotony(): assert not temperature_monotony -def test_basic_statistics(): - ds = fetchers.load_sample_dataset() - tools.plot_glider_track(ds) - tools.plot_grid_spacing(ds) - tools.plot_ts(ds) - - def test_vert_vel(): ds_sg014 = fetchers.load_sample_dataset(dataset_name="sg014_20040924T182454_delayed_subset.nc") ds_sg014 = tools.calc_w_meas(ds_sg014) ds_sg014 = tools.calc_w_sw(ds_sg014) - tools.plot_vertical_speeds_with_histograms(ds_sg014) + ds_dives = ds_sg014.sel(N_MEASUREMENTS=ds_sg014.PHASE == 2) ds_climbs = ds_sg014.sel(N_MEASUREMENTS=ds_sg014.PHASE == 1) ds_out_dives = tools.quant_binavg(ds_dives, var = 'VERT_CURR_MODEL', dz=10) ds_out_climbs = tools.quant_binavg(ds_climbs, var = 'VERT_CURR_MODEL', dz=10) - tools.plot_combined_velocity_profiles(ds_out_dives, ds_out_climbs) + # extra tests for ramsey calculations of DEPTH_Z ds_climbs = ds_climbs.drop_vars(['DEPTH_Z']) tools.quant_binavg(ds_climbs, var='VERT_CURR_MODEL', dz=10) ds_climbs = ds_climbs.drop_vars(['LATITUDE']) with pytest.raises(KeyError) as e: - tools.quant_binavg(ds_climbs, var='VERT_CURR_MODEL', dz=10) - - -def test_depth_z(): - ds = fetchers.load_sample_dataset() - assert 'DEPTH_Z' not in ds.variables - ds = tools.calc_DEPTH_Z(ds) - assert 'DEPTH_Z' in ds.variables - assert ds.DEPTH_Z.min() < -50 - assert ds.DEPTH_Z.max() < 0 + tools.quant_binavg(ds_climbs, var='VERT_CURR_MODEL', dz=10) \ No newline at end of file diff --git a/tests/test_utilities.py b/tests/test_utilities.py new file mode 100644 index 0000000..d95d675 --- /dev/null +++ b/tests/test_utilities.py @@ -0,0 +1,31 @@ +import pytest +from glidertest import fetchers, utilities +import matplotlib.pyplot as plt +import math +import numpy as np +import matplotlib +matplotlib.use('agg') # use agg backend to prevent creating plot windows during tests + +def test_utilitiesmix(): + ds = fetchers.load_sample_dataset() + utilities._check_necessary_variables(ds, ['PROFILE_NUMBER', 'DEPTH', 'TEMP', 'PSAL', 'LATITUDE', 'LONGITUDE']) + ds = utilities._calc_teos10_variables(ds) + p = 1 + z = 1 + tempG, profG, depthG = utilities.construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.TEMP, p, z) + denG, profG, depthG = utilities.construct_2dgrid(ds.PROFILE_NUMBER, ds.DEPTH, ds.DENSITY, p, z) + + halo = utilities.compute_cline(denG, depthG) + + +def test_sunset_sunrise(): + ds = fetchers.load_sample_dataset() + sunrise, sunset = utilities.compute_sunset_sunrise(ds.TIME, ds.LATITUDE, ds.LONGITUDE) + + +def test_depth_z(): + ds = fetchers.load_sample_dataset() + assert 'DEPTH_Z' not in ds.variables + ds = utilities.calc_DEPTH_Z(ds) + assert 'DEPTH_Z' in ds.variables + assert ds.DEPTH_Z.min() < -50 From 2ece48d17819897f152e21c419ef35771539afed Mon Sep 17 00:00:00 2001 From: ChiaraMonforte Date: Wed, 13 Nov 2024 13:07:44 +0100 Subject: [PATCH 2/4] computing DEPTH_Z was duplicated in both utitlities and tool. Removed from tools --- glidertest/tools.py | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/glidertest/tools.py b/glidertest/tools.py index 6792585..2e13cc0 100644 --- a/glidertest/tools.py +++ b/glidertest/tools.py @@ -160,41 +160,6 @@ def check_monotony(da): print(f'{da.name} is always monotonically increasing') return True -def calc_DEPTH_Z(ds): - """ - Calculate the depth (Z position) of the glider using the gsw library to convert pressure to depth. - - Parameters - ---------- - ds (xarray.Dataset): The input dataset containing 'PRES', 'LATITUDE', and 'LONGITUDE' variables. - - Returns - ------- - xarray.Dataset: The dataset with an additional 'DEPTH_Z' variable. - - Original author - ---------------- - Eleanor Frajka-Williams - """ - utilities._check_necessary_variables(ds, ['PRES', 'LONGITUDE', 'LATITUDE']) - - # Initialize the new variable with the same dimensions as dive_num - ds['DEPTH_Z'] = (['N_MEASUREMENTS'], np.full(ds.dims['N_MEASUREMENTS'], np.nan)) - - # Calculate depth using gsw - depth = gsw.z_from_p(ds['PRES'], ds['LATITUDE']) - ds['DEPTH_Z'] = depth - - # Assign the calculated depth to a new variable in the dataset - ds['DEPTH_Z'].attrs = { - "units": "meters", - "positive": "up", - "standard_name": "depth", - "comment": "Depth calculated from pressure using gsw library, positive up.", - } - - return ds - def calc_w_meas(ds): """ Calculate the vertical velocity of a glider using changes in pressure with time. From e765549d406470e14ab43a8b89bfa43318aff792 Mon Sep 17 00:00:00 2001 From: ChiaraMonforte Date: Wed, 13 Nov 2024 13:29:34 +0100 Subject: [PATCH 3/4] added test to compute DEPTH_Z from calc_W_meas + correct calc_w_meas function --- glidertest/tools.py | 2 +- tests/test_tools.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/glidertest/tools.py b/glidertest/tools.py index 2e13cc0..9945a0a 100644 --- a/glidertest/tools.py +++ b/glidertest/tools.py @@ -183,7 +183,7 @@ def calc_w_meas(ds): # Ensure inputs are numpy arrays time = ds.TIME.values if 'DEPTH_Z' not in ds.variables and all(var in ds.variables for var in ['PRES', 'LATITUDE', 'LONGITUDE']): - ds = calc_DEPTH_Z(ds) + ds = utilities.calc_DEPTH_Z(ds) depth = ds.DEPTH_Z.values # Calculate the centered differences in pressure and time, i.e. instead of using neighboring points, diff --git a/tests/test_tools.py b/tests/test_tools.py index 5727b7e..c7e6ae1 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -33,6 +33,7 @@ def test_check_monotony(): def test_vert_vel(): ds_sg014 = fetchers.load_sample_dataset(dataset_name="sg014_20040924T182454_delayed_subset.nc") + ds_sg014.drop_vars("DEPTH_Z") ds_sg014 = tools.calc_w_meas(ds_sg014) ds_sg014 = tools.calc_w_sw(ds_sg014) From 48c3013541d2af299e699a26ee55a81d890144fe Mon Sep 17 00:00:00 2001 From: ChiaraMonforte Date: Wed, 13 Nov 2024 13:43:34 +0100 Subject: [PATCH 4/4] corrected test so the DEPTH_Z actually run + changed function depth_z so we have no warning --- tests/test_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_tools.py b/tests/test_tools.py index c7e6ae1..bc541a9 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -33,7 +33,7 @@ def test_check_monotony(): def test_vert_vel(): ds_sg014 = fetchers.load_sample_dataset(dataset_name="sg014_20040924T182454_delayed_subset.nc") - ds_sg014.drop_vars("DEPTH_Z") + ds_sg014 = ds_sg014.drop_vars("DEPTH_Z") ds_sg014 = tools.calc_w_meas(ds_sg014) ds_sg014 = tools.calc_w_sw(ds_sg014)