From a34c283bfd81a60770bb8272b3367c0d6b63de56 Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Wed, 3 Jan 2024 15:52:44 -0500
Subject: [PATCH 01/30] added file analysis_db/B05_define_angle.py for step 6

---
 .../analysis_db/B05_define_angle.py           | 520 ++++++++++++++++++
 1 file changed, 520 insertions(+)
 create mode 100644 src/icesat2_tracks/analysis_db/B05_define_angle.py

diff --git a/src/icesat2_tracks/analysis_db/B05_define_angle.py b/src/icesat2_tracks/analysis_db/B05_define_angle.py
new file mode 100644
index 00000000..6a71c395
--- /dev/null
+++ b/src/icesat2_tracks/analysis_db/B05_define_angle.py
@@ -0,0 +1,520 @@
+import os, sys
+
+
+"""
+This file open a ICEsat2 track applied filters and corections and returns smoothed photon heights on a regular grid in an .nc file.
+This is python 3
+"""
+
+from icesat2_tracks.config.IceSAT2_startup import (
+    mconfig,
+    xr,
+    color_schemes,
+    plt,
+    font_for_print,
+    np,
+)
+
+import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec
+
+from numba import jit
+
+import time
+import icesat2_tracks.ICEsat2_SI_tools.lanczos as lanczos
+import icesat2_tracks.local_modules.m_tools_ph3 as MT
+import icesat2_tracks.local_modules.m_general_ph3 as M
+
+from matplotlib.gridspec import GridSpec
+
+color_schemes.colormaps2(21)
+
+col_dict = color_schemes.rels
+
+track_name, batch_key, test_flag = io.init_from_input(sys.argv)
+hemis, batch = batch_key.split("_")
+
+ATlevel = "ATL03"
+plot_path = (
+    mconfig["paths"]["plot"]
+    + "/"
+    + hemis
+    + "/"
+    + batch_key
+    + "/"
+    + track_name
+    + "/B05_angle/"
+)
+MT.mkdirs_r(plot_path)
+
+all_beams = mconfig["beams"]["all_beams"]
+high_beams = mconfig["beams"]["high_beams"]
+low_beams = mconfig["beams"]["low_beams"]
+beam_groups = mconfig["beams"]["groups"]
+group_names = mconfig["beams"]["group_names"]
+
+load_path = mconfig["paths"]["work"] + batch_key + "/B02_spectra/"
+Gk = xr.load_dataset(load_path + "/B02_" + track_name + "_gFT_k.nc")  #
+
+load_path = mconfig["paths"]["work"] + batch_key + "/B04_angle/"
+Marginals = xr.load_dataset(load_path + "/B04_" + track_name + "_marginals.nc")  #
+
+load_path = mconfig["paths"]["work"] + batch_key + "/A02_prior/"
+Prior = MT.load_pandas_table_dict("/A02_" + track_name, load_path)["priors_hindcast"]
+
+save_path = mconfig["paths"]["work"] + batch_key + "/B04_angle/"
+
+
+def derive_weights(weights):
+    weights = (weights - weights.mean()) / weights.std()
+    weights = weights - weights.min()
+    return weights
+
+
+def weighted_means(data, weights, x_angle, color="k"):
+    """
+    weights should have nans when there is no data
+    data should have zeros where there is no data
+    """
+    from scipy.ndimage.measurements import label
+
+    # make wavenumber groups
+    groups, Ngroups = label(weights.where(~np.isnan(weights), 0))
+
+    for ng in np.arange(1, Ngroups + 1):
+        wi = weights[groups == ng]
+        weight_norm = weights.sum("k")
+        k = wi.k.data
+        data_k = data.sel(k=k).squeeze()
+        data_weight = data_k * wi
+        plt.stairs(data_weight.sum("k") / weight_norm, x_angle, linewidth=1, color="k")
+        if data_k.k.size > 1:
+            for k in data_k.k.data:
+                plt.stairs(
+                    data_weight.sel(k=k) / weight_norm, x_angle, color="gray", alpha=0.5
+                )
+
+    data_weighted_mean = (
+        data.where((~np.isnan(data)) & (data != 0), np.nan) * weights
+    ).sum("k") / weight_norm
+    return data_weighted_mean
+
+
+# cut out data at the boundary and redistibute variance
+angle_mask = Marginals.angle * 0 == 0
+angle_mask[0], angle_mask[-1] = False, False
+corrected_marginals = (
+    Marginals.marginals.isel(angle=angle_mask)
+    + Marginals.marginals.isel(angle=~angle_mask).sum("angle") / sum(angle_mask).data
+)
+
+# get groupweights
+# ----------------- thius does not work jet.ckeck with data on server how to get number of data points per stancil
+# Gx['x'] = Gx.x - Gx.x[0]
+
+# makde dummy variables
+M_final = xr.full_like(
+    corrected_marginals.isel(k=0, beam_group=0).drop("beam_group").drop("k"), np.nan
+)
+M_final_smth = xr.full_like(
+    corrected_marginals.isel(k=0, beam_group=0).drop("beam_group").drop("k"), np.nan
+)
+if M_final.shape[0] > M_final.shape[1]:
+    M_final = M_final.T
+    M_final_smth = M_final_smth.T
+    corrected_marginals = corrected_marginals.T
+
+Gweights = corrected_marginals.N_data
+Gweights = Gweights / Gweights.max()
+
+k_mask = corrected_marginals.mean("beam_group").mean("angle")
+
+xticks_2pi = np.arange(-np.pi, np.pi + np.pi / 4, np.pi / 4)
+xtick_labels_2pi = [
+    "-$\pi$",
+    "-$3\pi/4$",
+    "-$\pi/2$",
+    "-$\pi/4$",
+    "0",
+    "$\pi/4$",
+    "$\pi/2$",
+    "$3\pi/4$",
+    "$\pi$",
+]
+
+xticks_pi = np.arange(-np.pi / 2, np.pi / 2 + np.pi / 4, np.pi / 4)
+xtick_labels_pi = [
+    "-$\pi/2$",
+    "-$\pi/4$",
+    "0",
+    "$\pi/4$",
+    "$\pi/2$",
+]
+
+
+font_for_print()
+x_list = corrected_marginals.x
+for xi in range(x_list.size):
+    F = M.figure_axis_xy(7, 3.5, view_scale=0.8, container=True)
+    gs = GridSpec(3, 2, wspace=0.1, hspace=0.8)
+    x_str = str(int(x_list[xi] / 1e3))
+
+    plt.suptitle(
+        "Weighted marginal PDFs\nx=" + x_str + "\n" + io.ID_to_str(track_name),
+        y=1.05,
+        x=0.125,
+        horizontalalignment="left",
+    )
+    group_weight = Gweights.isel(x=xi)
+
+    ax_list = dict()
+    ax_sum = F.fig.add_subplot(gs[1, 1])
+
+    ax_list["sum"] = ax_sum
+
+    data_collect = dict()
+    for group, gpos in zip(Marginals.beam_group.data, [gs[0, 0], gs[0, 1], gs[1, 0]]):
+        ax0 = F.fig.add_subplot(gpos)
+        ax0.tick_params(labelbottom=False)
+        ax_list[group] = ax0
+
+        data = corrected_marginals.isel(x=xi).sel(beam_group=group)
+        weights = derive_weights(Marginals.weight.isel(x=xi).sel(beam_group=group))
+        weights = weights**2
+
+        # derive angle axis
+        x_angle = data.angle.data
+        d_angle = np.diff(x_angle)[0]
+        x_angle = np.insert(x_angle, x_angle.size, x_angle[-1].data + d_angle)
+
+        if ((~np.isnan(data)).sum().data == 0) | ((~np.isnan(weights)).sum().data == 0):
+            data_wmean = data.mean("k")
+        else:
+            data_wmean = weighted_means(data, weights, x_angle, color=col_dict[group])
+            plt.stairs(data_wmean, x_angle, color=col_dict[group], alpha=1)
+
+        plt.title("Marginal PDF " + group, loc="left")
+        plt.sca(ax_sum)
+
+        data_collect[group] = data_wmean
+
+    data_collect = xr.concat(data_collect.values(), dim="beam_group")
+    final_data = (group_weight * data_collect).sum("beam_group") / group_weight.sum(
+        "beam_group"
+    ).data
+
+    plt.sca(ax_sum)
+    plt.stairs(final_data, x_angle, color="k", alpha=1, linewidth=0.8)
+    ax_sum.set_xlabel("Angle (rad)")
+    plt.title("Weighted mean over group & wavenumber", loc="left")
+
+    # get relevant priors
+    for axx in ax_list.values():
+        axx.set_ylim(0, final_data.max() * 1.5)
+        axx.set_xticks(xticks_pi)
+        axx.set_xticklabels(xtick_labels_pi)
+
+    try:
+        ax_list["group3"].set_ylabel("PDF")
+        ax_list["group1"].set_ylabel("PDF")
+        ax_list["group3"].tick_params(labelbottom=True)
+        ax_list["group3"].set_xlabel("Angle (rad)")
+    except:
+        pass
+
+    ax_final = F.fig.add_subplot(gs[-1, :])
+    plt.title("Final angle PDF", loc="left")
+
+    priors_k = Marginals.Prior_direction[~np.isnan(k_mask.isel(x=xi))]
+    for pk in priors_k:
+        ax_final.axvline(pk, color=color_schemes.cascade2, linewidth=1, alpha=0.7)
+
+    plt.stairs(final_data, x_angle, color="k", alpha=0.5, linewidth=0.8)
+
+    final_data_smth = lanczos.lanczos_filter_1d(x_angle, final_data, 0.1)
+
+    plt.plot(x_angle[0:-1], final_data_smth, color="black", linewidth=0.8)
+
+    ax_final.axvline(
+        x_angle[0:-1][final_data_smth.argmax()],
+        color=color_schemes.orange,
+        linewidth=1.5,
+        alpha=1,
+        zorder=1,
+    )
+    ax_final.axvline(
+        x_angle[0:-1][final_data_smth.argmax()],
+        color=color_schemes.black,
+        linewidth=3.2,
+        alpha=1,
+        zorder=0,
+    )
+
+    plt.xlabel("Angle (rad)")
+    plt.xlim(-np.pi * 0.8, np.pi * 0.8)
+
+    ax_final.set_xticks(xticks_pi)
+    ax_final.set_xticklabels(xtick_labels_pi)
+
+    M_final[xi, :] = final_data
+    M_final_smth[xi, :] = final_data_smth
+
+    F.save_pup(path=plot_path, name="B05_weigthed_margnials_x" + x_str)
+
+
+M_final.name = "weighted_angle_PDF"
+M_final_smth.name = "weighted_angle_PDF_smth"
+Gpdf = xr.merge([M_final, M_final_smth])
+
+if len(Gpdf.x) < 2:
+    print("not enough x data, exit")
+    MT.json_save(
+        "B05_fail",
+        plot_path + "../",
+        {
+            "time": time.asctime(time.localtime(time.time())),
+            "reason": "not enough x segments",
+        },
+    )
+    print("exit()")
+    exit()
+
+
+class plot_polarspectra(object):
+    def __init__(self, k, thetas, data, data_type="fraction", lims=None, verbose=False):
+        """
+        data_type       either 'fraction' or 'energy', default (fraction)
+        lims            (None) limts of k. if None set by the limits of the vector k
+        """
+        self.k = k
+        self.data = data
+        self.thetas = thetas
+
+        self.lims = lims = [self.k.min(), self.k.max()] if lims is None else lims
+        freq_sel_bool = M.cut_nparray(self.k, lims[0], lims[1])
+
+        self.min = np.round(np.nanmin(data[freq_sel_bool, :]), 2)
+        self.max = np.round(np.nanmax(data[freq_sel_bool, :]), 2)
+        if verbose:
+            print(str(self.min), str(self.max))
+
+        self.klabels = np.linspace(self.min, self.max, 5)
+
+        self.data_type = data_type
+        if data_type == "fraction":
+            self.clevs = np.linspace(
+                np.nanpercentile(dir_data.data, 1), np.ceil(self.max * 0.9), 21
+            )
+        elif data_type == "energy":
+            self.ctrs_min = self.min + self.min * 0.05
+            self.clevs = np.linspace(self.min + self.min * 0.05, self.max * 0.60, 21)
+
+    def linear(self, radial_axis="period", ax=None, cbar_flag=True):
+        """ """
+        if ax is None:
+            ax = plt.subplot(111, polar=True)
+        else:
+            ax = ax
+        ax.set_theta_direction(-1)
+        ax.set_theta_zero_location("W")
+
+        grid = ax.grid(color="k", alpha=0.5, linestyle="-", linewidth=0.5)
+
+        if self.data_type == "fraction":
+            cm = plt.cm.RdYlBu_r
+            colorax = ax.contourf(
+                self.thetas, self.k, self.data, self.clevs, cmap=cm, zorder=1
+            )
+        elif self.data_type == "energy":
+            cm = plt.cm.Paired
+            cm.set_under = "w"
+            cm.set_bad = "w"
+            colorax = ax.contourf(
+                self.thetas, self.k, self.data, self.clevs, cmap=cm, zorder=1
+            )  # , vmin=self.ctrs_min)
+
+        if cbar_flag:
+            cbar = plt.colorbar(
+                colorax, fraction=0.046, pad=0.1, orientation="horizontal"
+            )
+            cbar.ax.get_yaxis().labelpad = 30
+            cbar.outline.set_visible(False)
+            clev_tick_names, clev_ticks = MT.tick_formatter(
+                FP.clevs, expt_flag=False, shift=0, rounder=4, interval=1
+            )
+            cbar.set_ticks(clev_ticks[::5])
+            cbar.set_ticklabels(clev_tick_names[::5])
+            self.cbar = cbar
+
+        if (self.lims[-1] - self.lims[0]) > 500:
+            radial_ticks = np.arange(100, 1600, 300)
+        else:
+            radial_ticks = np.arange(100, 800, 100)
+        xx_tick_names, xx_ticks = MT.tick_formatter(
+            radial_ticks, expt_flag=False, shift=1, rounder=0, interval=1
+        )
+        xx_tick_names = ["  " + str(d) + "m" for d in xx_tick_names]
+
+        ax.set_yticks(xx_ticks[::1])
+        ax.set_yticklabels(xx_tick_names[::1])
+
+        degrange = np.arange(0, 360, 30)
+        degrange = degrange[(degrange <= 80) | (degrange >= 280)]
+        degrange_label = np.copy(degrange)
+        degrange_label[degrange_label > 180] = (
+            degrange_label[degrange_label > 180] - 360
+        )
+
+        degrange_label = [str(d) + "$^{\circ}$" for d in degrange_label]
+
+        lines, labels = plt.thetagrids(degrange, labels=degrange_label)
+
+        for line in lines:
+            line.set_linewidth(5)
+
+        ax.set_ylim(self.lims)
+        ax.spines["polar"].set_color("none")
+        ax.set_rlabel_position(87)
+        self.ax = ax
+
+
+font_for_print()
+F = M.figure_axis_xy(6, 5.5, view_scale=0.7, container=True)
+gs = GridSpec(8, 6, wspace=0.1, hspace=3.1)
+color_schemes.colormaps2(21)
+
+cmap_spec = plt.cm.ocean_r
+clev_spec = np.linspace(-8, -1, 21) * 10
+
+cmap_angle = color_schemes.cascade_r
+clev_angle = np.linspace(0, 4, 21)
+
+
+ax1 = F.fig.add_subplot(gs[0:3, :])
+ax1.tick_params(labelbottom=False)
+
+weighted_spec = (Gk.gFT_PSD_data * Gk.N_per_stancil).sum("beam") / Gk.N_per_stancil.sum(
+    "beam"
+)
+x_spec = weighted_spec.x / 1e3
+k = weighted_spec.k
+
+xlims = x_spec[0], x_spec[-1]
+clev_spec = np.linspace(-80, (10 * np.log(weighted_spec)).max() * 0.9, 21)
+
+plt.pcolor(
+    x_spec,
+    k,
+    10 * np.log(weighted_spec),
+    vmin=clev_spec[0],
+    vmax=clev_spec[-1],
+    cmap=cmap_spec,
+)
+
+
+plt.title(track_name + "\nPower Spectra (m/m)$^2$ k$^{-1}$", loc="left")
+
+cbar = plt.colorbar(fraction=0.018, pad=0.01, orientation="vertical", label="Power")
+cbar.outline.set_visible(False)
+clev_ticks = np.round(clev_spec[::3], 0)
+cbar.set_ticks(clev_ticks)
+cbar.set_ticklabels(clev_ticks)
+
+plt.ylabel("wavenumber $k$")
+
+ax2 = F.fig.add_subplot(gs[3:5, :])
+ax2.tick_params(labelleft=True)
+
+dir_data = Gpdf.interp(x=weighted_spec.x).weighted_angle_PDF_smth.T
+
+x = Gpdf.x / 1e3
+angle = Gpdf.angle
+plt.pcolor(
+    x_spec, angle, dir_data, vmin=clev_angle[0], vmax=clev_angle[-1], cmap=cmap_angle
+)
+
+cbar = plt.colorbar(fraction=0.01, pad=0.01, orientation="vertical", label="Density")
+plt.title("Direction PDF", loc="left")
+
+plt.xlabel("x (km)")
+plt.ylabel("angle")
+
+ax2.set_yticks(xticks_pi)
+ax2.set_yticklabels(xtick_labels_pi)
+
+
+x_ticks = np.arange(0, xlims[-1].data, 50)
+x_tick_labels, x_ticks = MT.tick_formatter(
+    x_ticks, expt_flag=False, shift=0, rounder=1, interval=2
+)
+
+ax1.set_xticks(x_ticks)
+ax2.set_xticks(x_ticks)
+ax1.set_xticklabels(x_tick_labels)
+ax2.set_xticklabels(x_tick_labels)
+ax1.set_xlim(xlims)
+ax2.set_xlim(xlims)
+
+
+xx_list = np.insert(corrected_marginals.x.data, 0, 0)
+x_chunks = spec.create_chunk_boundaries(
+    int(xx_list.size / 3), xx_list.size, iter_flag=False
+)
+x_chunks = x_chunks[:, ::2]
+x_chunks[-1, -1] = xx_list.size - 1
+
+
+for x_pos, gs in zip(x_chunks.T, [gs[-3:, 0:2], gs[-3:, 2:4], gs[-3:, 4:]]):
+    x_range = xx_list[[x_pos[0], x_pos[-1]]]
+
+    ax1.axvline(x_range[0] / 1e3, linestyle=":", color="white", alpha=0.5)
+    ax1.axvline(x_range[-1] / 1e3, color="gray", alpha=0.5)
+
+    ax2.axvline(x_range[0] / 1e3, linestyle=":", color="white", alpha=0.5)
+    ax2.axvline(x_range[-1] / 1e3, color="gray", alpha=0.5)
+
+    i_spec = weighted_spec.sel(x=slice(x_range[0], x_range[-1]))
+    i_dir = corrected_marginals.sel(x=slice(x_range[0], x_range[-1]))
+
+    dir_data = (i_dir * i_dir.N_data).sum(["beam_group", "x"]) / i_dir.N_data.sum(
+        ["beam_group", "x"]
+    )
+    lims = (
+        dir_data.k[(dir_data.sum("angle") != 0)][0].data,
+        dir_data.k[(dir_data.sum("angle") != 0)][-1].data,
+    )
+
+    N_angle = i_dir.angle.size
+    dir_data2 = dir_data
+
+    plot_data = dir_data2 * i_spec.mean("x")
+    plot_data = plot_data.rolling(angle=5, k=10).median()
+
+    plot_data = plot_data.sel(k=slice(lims[0], lims[-1]))
+    xx = 2 * np.pi / plot_data.k
+
+    if np.nanmax(plot_data.data) != np.nanmin(plot_data.data):
+        ax3 = F.fig.add_subplot(gs, polar=True)
+        FP = plot_polarspectra(
+            xx,
+            plot_data.angle,
+            plot_data,
+            lims=None,
+            verbose=False,
+            data_type="fraction",
+        )
+        FP.clevs = np.linspace(
+            np.nanpercentile(plot_data.data, 1), np.round(plot_data.max(), 4), 21
+        )
+        FP.linear(ax=ax3, cbar_flag=False)
+
+F.save_pup(path=plot_path + "../", name="B05_dir_ov")
+
+# save data
+Gpdf.to_netcdf(save_path + "/B05_" + track_name + "_angle_pdf.nc")
+
+MT.json_save(
+    "B05_success",
+    plot_path + "../",
+    {"time": time.asctime(time.localtime(time.time()))},
+)

From 57b4b50dcced07ab272676a10979d17207850c0c Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Wed, 3 Jan 2024 17:04:10 -0500
Subject: [PATCH 02/30] add analysis_db/B06_correct_separate_var.py for step 7

---
 analysis_db/B06_correct_separate_var.py       | 849 ------------------
 .../analysis_db/B06_correct_separate_var.py   | 775 ++++++++++++++++
 2 files changed, 775 insertions(+), 849 deletions(-)
 delete mode 100644 analysis_db/B06_correct_separate_var.py
 create mode 100644 src/icesat2_tracks/analysis_db/B06_correct_separate_var.py

diff --git a/analysis_db/B06_correct_separate_var.py b/analysis_db/B06_correct_separate_var.py
deleted file mode 100644
index a659e8a2..00000000
--- a/analysis_db/B06_correct_separate_var.py
+++ /dev/null
@@ -1,849 +0,0 @@
-
-import os, sys
-#execfile(os.environ['PYTHONSTARTUP'])
-
-"""
-This file open a ICEsat2 track applied filters and corections and returns smoothed photon heights on a regular grid in an .nc file.
-This is python 3
-"""
-
-exec(open(os.environ['PYTHONSTARTUP']).read())
-exec(open(STARTUP_2021_IceSAT2).read())
-
-#%matplotlib inline
-
-import ICEsat2_SI_tools.convert_GPS_time as cGPS
-import h5py
-import ICEsat2_SI_tools.io as io
-import ICEsat2_SI_tools.spectral_estimates as spec
-import ICEsat2_SI_tools.lanczos as lanczos
-import time
-import imp
-import copy
-import spicke_remover
-import datetime
-import generalized_FT as gFT
-from scipy.ndimage.measurements import label
-
-xr.set_options(display_style='text')
-#import s3fs
-# %%
-ID_name, batch_key, test_flag = io.init_from_input(sys.argv) # loads standard experiment
-#ID_name, batch_key, test_flag = '20190605061807_10380310_004_01', 'SH_batch01', False
-#ID_name, batch_key, test_flag = '20190601094826_09790312_004_01', 'SH_batch01', False
-#ID_name, batch_key, test_flag = '20190207111114_06260210_004_01', 'SH_batch02', False
-#ID_name, batch_key, test_flag = '20190208152826_06440210_004_01', 'SH_batch01', False
-#ID_name, batch_key, test_flag = '20190213133330_07190212_004_01', 'SH_batch02', False
-#ID_name, batch_key, test_flag = '20190207002436_06190212_004_01', 'SH_batch02', False
-#ID_name, batch_key, test_flag = '20190206022433_06050212_004_01', 'SH_batch02', False
-
-#ID_name, batch_key, test_flag = '20190219073735_08070210_004_01', 'SH_batch02', False
-#ID_name, batch_key, test_flag = '20190502021224_05160312_004_01', 'SH_batch02', False
-
-#ID_name, batch_key, test_flag =  'SH_20190208_06440212', 'SH_publish', True
-#ID_name, batch_key, test_flag =  'SH_20190219_08070210', 'SH_publish', True
-ID_name, batch_key, test_flag =  'SH_20190502_05160312', 'SH_publish', True
-
-#ID_name, batch_key, test_flag =  'NH_20190311_11200203', 'NH_batch06', True
-#ID_name, batch_key, test_flag =  'NH_20210312_11961005', 'NH_batch07', True
-
-
-
-#print(ID_name, batch_key, test_flag)
-hemis, batch = batch_key.split('_')
-
-all_beams   = mconfig['beams']['all_beams']
-high_beams  = mconfig['beams']['high_beams']
-low_beams   = mconfig['beams']['low_beams']
-
-load_path_work    = mconfig['paths']['work'] +'/'+ batch_key +'/'
-B2_hdf5    = h5py.File(load_path_work +'B01_regrid'+'/'+ID_name + '_B01_regridded.h5', 'r')
-B3_hdf5    = h5py.File(load_path_work +'B01_regrid'+'/'+ID_name + '_B01_binned.h5', 'r')
-
-B2, B3 = dict(), dict()
-for b in all_beams:
-    B2[b] = io.get_beam_hdf_store(B2_hdf5[b])
-    B3[b] = io.get_beam_hdf_store(B3_hdf5[b])
-
-B2_hdf5.close(), B2_hdf5.close()
-
-# B2          = io.load_pandas_table_dict(ID_name + '_B01_regridded'  , load_path1) # rhis is the rar photon data
-# B3          = io.load_pandas_table_dict(ID_name + '_B01_binned'     , load_path1)  #
-
-load_file   = load_path_work +'/B02_spectra/' + 'B02_' + ID_name #+ '.nc'
-Gk = xr.open_dataset(load_file+'_gFT_k.nc')
-Gx = xr.open_dataset(load_file+'_gFT_x.nc')
-Gfft = xr.open_dataset(load_file+'_FFT.nc')
-
-
-#plot_path   = mconfig['paths']['plot'] + '/'+hemis+'/'+batch_key+'/' + ID_name + '/'
-plot_path   = mconfig['paths']['plot'] + '/'+hemis+'/'+batch_key+'/' + ID_name + '/B06_correction/'
-MT.mkdirs_r(plot_path)
-
-save_path   = mconfig['paths']['work'] +batch_key+'/B06_corrected_separated/'
-MT.mkdirs_r(save_path)
-
-
-# %%
-
-#Gfilt   = io.load_pandas_table_dict(ID_name + '_B01_regridded', load_path) # rhis is the rar photon data
-#Gd      = io.load_pandas_table_dict(ID_name + '_B01_binned' , load_path)  #
-
-col.colormaps2(31, gamma=1)
-col_dict= col.rels
-
-
-# %%
-def dict_weighted_mean(Gdict, weight_key):
-    """
-    returns the weighted meean of a dict of xarray, data_arrays
-    weight_key must be in the xr.DataArrays
-    """
-    #Gdict = G_rar_fft
-    #weight_key='N_per_stancil'
-
-    akey = list( Gdict.keys() )[0]
-    GSUM = Gdict[akey].copy()
-    GSUM.data     = np.zeros(GSUM.shape)
-    N_per_stancil = GSUM.N_per_stancil * 0
-    N_photons     = np.zeros(GSUM.N_per_stancil.size)
-
-    counter= 0
-    for k,I in Gdict.items():
-        #print(k)
-        I =I.squeeze()
-        print(len(I.x) )
-        if len(I.x) !=0:
-            GSUM                += I.where( ~np.isnan(I), 0) * I[weight_key] #.sel(x=GSUM.x)
-            N_per_stancil       += I[weight_key]
-        if 'N_photons' in GSUM.coords:
-            N_photons    += I['N_photons']
-        counter+=1
-
-    GSUM             = GSUM  / N_per_stancil
-
-    if 'N_photons' in GSUM.coords:
-        GSUM.coords['N_photons'] = (('x', 'beam'), np.expand_dims(N_photons, 1) )
-
-    GSUM['beam'] = ['weighted_mean']
-    GSUM.name='power_spec'
-
-    return GSUM
-
-
-#G_gFT_wmean = (Gk['gFT_PSD_data'].where( ~np.isnan(Gk['gFT_PSD_data']), 0) * Gk['N_per_stancil']).sum('beam')/ Gk['N_per_stancil'].sum('beam')
-
-G_gFT_wmean = (Gk.where( ~np.isnan(Gk['gFT_PSD_data']), 0) * Gk['N_per_stancil']).sum('beam')/ Gk['N_per_stancil'].sum('beam')
-G_gFT_wmean['N_photons'] = Gk['N_photons'].sum('beam')
-
-G_fft_wmean = (Gfft.where( ~np.isnan(Gfft), 0) * Gfft['N_per_stancil']).sum('beam')/ Gfft['N_per_stancil'].sum('beam')
-G_fft_wmean['N_per_stancil'] = Gfft['N_per_stancil'].sum('beam')
-
-
-# %% plot
-
-# derive spectral errors:
-Lpoints=  Gk.Lpoints.mean('beam').data
-N_per_stancil = Gk.N_per_stancil.mean('beam').data#[0:-2]
-
-G_error_model =dict()
-G_error_data =dict()
-
-for bb in Gk.beam.data:
-    I = Gk.sel(beam= bb)
-    b_bat_error =  np.concatenate([ I.model_error_k_cos.data , I.model_error_k_sin.data ])
-    Z_error = gFT.complex_represenation(b_bat_error, Gk.k.size, Lpoints)
-    PSD_error_data, PSD_error_model = gFT.Z_to_power_gFT(Z_error, np.diff(Gk.k)[0],N_per_stancil  , Lpoints )
-
-    #np.expand_dims(PSD_error_model, axis =)
-    G_error_model[bb] =  xr.DataArray(data = PSD_error_model, coords = I.drop('N_per_stancil').coords, name='gFT_PSD_data_error' ).expand_dims('beam')
-    G_error_data[bb] =  xr.DataArray(data = PSD_error_data, coords = I.drop('N_per_stancil').coords, name='gFT_PSD_data_error' ).expand_dims('beam')
-
-gFT_PSD_data_error_mean = xr.concat(G_error_model.values(), dim='beam')
-gFT_PSD_data_error_mean = xr.concat(G_error_data.values(), dim='beam')
-
-gFT_PSD_data_error_mean = ( gFT_PSD_data_error_mean.where( ~np.isnan(gFT_PSD_data_error_mean), 0) * Gk['N_per_stancil']).sum('beam')/Gk['N_per_stancil'].sum('beam')
-gFT_PSD_data_error_mean = ( gFT_PSD_data_error_mean.where( ~np.isnan(gFT_PSD_data_error_mean), 0) * Gk['N_per_stancil']).sum('beam')/Gk['N_per_stancil'].sum('beam')
-
-G_gFT_wmean['gFT_PSD_data_err'] = gFT_PSD_data_error_mean
-G_gFT_wmean['gFT_PSD_data_err'] = gFT_PSD_data_error_mean
-
-Gk['gFT_PSD_data_err'] = xr.concat(G_error_model.values(), dim='beam')
-Gk['gFT_PSD_data_err']  = xr.concat(G_error_data.values(), dim='beam')
-
-
-# %%
-
-G_gFT_smth = G_gFT_wmean['gFT_PSD_data'].rolling(k=30, center=True, min_periods=1).mean()
-G_gFT_smth['N_photons'] = G_gFT_wmean.N_photons
-G_gFT_smth["N_per_stancil_fraction"] = Gk['N_per_stancil'].T.mean('beam')/Gk.Lpoints.mean('beam')
-
-k = G_gFT_smth.k
-
-# %%
-# GG_no_nan = G_gFT_smth.isel( x = ~np.isnan(G_gFT_smth.mean('k')) )
-# k_lead_peak = GG_no_nan.k[GG_no_nan.isel(x=0).argmax().data].data
-# if k_lead_peak== k[0].data or k_lead_peak == k[-1].data:
-#     #raise ValueError('wavenumber Peak on Boundary!')
-#     print('wavenumber Peak on Boundary!')
-#     MT.json_save('B06_fail', plot_path+'../',  {'time':time.asctime( time.localtime(time.time()) ) , 'reason': 'wavenumber Peak on Boundary!'})
-#     print('exit()')
-#     #exit()
-#
-# # %%
-# k_lims =0.01
-# k_span = [k_lead_peak- k_lims , k_lead_peak, k_lead_peak+ k_lims]
-
-F = M.figure_axis_xy()
-#plt.loglog(k, k**(-2))
-# plt.loglog(k, 1e-4 *k**(-2))
-# plt.loglog(k, 1e-5 *k**(-3))
-
-# F.ax.axvline(k_span[0])
-# F.ax.axvline(k_span[1])
-# F.ax.axvline(k_span[2])
-#plt.plot(np.log(k), np.log( k**(-3) ) )
-#plt.loglog(k, (k)**(-3) - 1e5)
-
-plt.loglog(k, G_gFT_smth/k)
-# dd= dd.where(~np.isinf(dd), np.nan )
-#plt.grid()
-plt.title('displacement power Spectra', loc='left')
-
-# %%
-def define_noise_wavenumber_tresh_simple(data_xr, k_peak, k_end_lim =None,  plot_flag = False):
-
-    """
-    returns noise wavenumber on the high end of a spectral peak. This method fits a straight line in loglog speace using robust regression.
-    The noise level is defined as the wavenumber at which the residual error of a linear fit to the data is minimal.
-
-    inputs:
-    data_xr xarray.Dataarray with the power spectra with k as dimension
-    k_peak  wavenumber above which the searh should start
-    dk      the intervall over which the regrssion is repeated
-
-    returns:
-    k_end   the wavenumber at which the spectrum flattens
-    m       slope of the fitted line
-    b       intersect of the fitted line
-    """
-    #data_xr, k_peak =    G_gFT_smth.isel(x=0), k_lead_peak
-    #k_end_lim = None#
-    #k_end_lim= 0.06396283#0.0224938*1.05
-    from scipy.ndimage.measurements import label
-
-    if k_end_lim is None:
-        k_end_lim =data_xr.k[-1]
-
-    k_lead_peak_margin = k_peak *1.05
-    try:
-        data_log = np.log(data_xr).isel(k =(data_xr.k > k_lead_peak_margin)).rolling(k =10,  center=True, min_periods=1).mean()
-
-    except:
-        data_log = np.log(data_xr).isel(k =(data_xr.k > k_lead_peak_margin/2)).rolling(k =10,  center=True, min_periods=1).mean()
-
-    k_log= np.log(data_log.k)
-    try:
-        d_grad = data_log.differentiate('k').rolling(k =40, center=True, min_periods=4).mean()
-    except:
-        d_grad = data_log.differentiate('k').rolling(k =20, center=True, min_periods=2).mean()
-    ll = label( d_grad >=-5  )
-
-    #test if plausible minium exist:
-    # #print(ll[0][d_grad.k <= k_end_lim] )
-    # if sum(  ll[0][d_grad.k <= k_end_lim] ==0) == 0:
-    #     #print(sum(  ll[0][d_grad.k <= k_end_lim] ==0) == 0)
-    #     print('no gradient in range, set to peak')
-    #     return k_peak
-
-    if ll[0][0] !=0:
-        #print(sum(  ll[0][d_grad.k <= k_end_lim] ==0) == 0)
-        print('no decay, set to peak')
-        return k_peak
-
-    if sum(ll[0]) == 0:
-        k_end = d_grad.k[-1]
-    else:
-        k_end = d_grad.k[(ll[0] == 1) ][0].data
-
-    if plot_flag:
-        # plt.plot(np.log(d_grad.k), d_grad)
-        # plt.show()
-        plt.plot(np.log(data_xr.k), np.log(data_xr))
-        plt.plot(k_log, data_log )
-        plt.plot([np.log(k_end), np.log(k_end)], [-6, -5])
-        #print(k_end)
-    return k_end
-
-
-
-# %% new version
-def get_correct_breakpoint(pw_results):
-    br_points   = list()
-    for i in pw_results.keys():
-        [br_points.append(i) if 'breakpoint' in i else None]
-    br_points_df = pw_results[br_points]
-    br_points_sorted = br_points_df.sort_values()
-
-    alphas_sorted = [i.replace('breakpoint', 'alpha') for i in br_points_df.sort_values().index]
-    alphas_sorted.append('alpha'+ str(len(alphas_sorted)+1) )
-
-
-    betas_sorted = [i.replace('breakpoint', 'beta') for i in br_points_df.sort_values().index]
-
-    #betas_sorted
-    alphas_v2 = list()
-    alpha_i = pw_results['alpha1']
-    for i in [0] + list(pw_results[betas_sorted]):
-        alpha_i += i
-        alphas_v2.append(alpha_i)
-
-    alphas_v2_sorted   = pd.Series(index = alphas_sorted, data =alphas_v2)
-    br_points_sorted['breakpoint'+ str(br_points_sorted.size+1)] = 'end'
-
-    print('all alphas')
-    print(alphas_v2_sorted)
-    slope_mask = alphas_v2_sorted < 0
-
-    if sum(slope_mask) ==0:
-        print('no negative slope found, set to lowest')
-        breakpoint = 'start'
-    else:
-
-        # take steepest slope
-        alpah_v2_sub = alphas_v2_sorted[slope_mask]
-        print(alpah_v2_sub)
-        print(alpah_v2_sub.argmin())
-        break_point_name =  alpah_v2_sub.index[alpah_v2_sub.argmin()].replace('alpha', 'breakpoint')
-
-        # take first slope
-        #break_point_name = alphas_v2_sorted[slope_mask].index[0].replace('alpha', 'breakpoint')
-        breakpoint = br_points_sorted[break_point_name]
-
-    return breakpoint
-
-def get_breakingpoints(xx, dd):
-
-    import piecewise_regression
-    x2, y2 = xx, dd
-    convergence_flag =True
-    n_breakpoints= 3
-    while convergence_flag:
-        pw_fit = piecewise_regression.Fit(x2, y2, n_breakpoints=n_breakpoints)
-        print('n_breakpoints', n_breakpoints, pw_fit.get_results()['converged'])
-        convergence_flag = not pw_fit.get_results()['converged']
-        n_breakpoints += 1
-        if n_breakpoints >=4:
-            convergence_flag = False
-
-    pw_results = pw_fit.get_results()
-    #pw_fit.summary()
-
-    if pw_results['converged']:
-        # if pw_results['estimates']['alpha1']['estimate'] < 0:
-        #     print('decay at the front')
-        #     print('n_breakpoints',pw_fit.n_breakpoints )
-
-        pw_results_df = pd.DataFrame(pw_results['estimates']).loc['estimate']
-
-        breakpoint = get_correct_breakpoint(pw_results_df)
-
-        return pw_fit, breakpoint
-
-    else:
-        return pw_fit, False
-
-def define_noise_wavenumber_piecewise(data_xr, plot_flag = False):
-
-    data_log = data_xr
-    data_log = np.log(data_xr)
-
-    k =data_log.k.data
-    k_log= np.log(k)
-
-    pw_fit, breakpoint_log   = get_breakingpoints(k_log, data_log.data)
-
-    if breakpoint_log is 'start':
-        print('no decay, set to lowerst wavenumber')
-        breakpoint_log =  k_log[0]
-    if (breakpoint_log is 'end') | (breakpoint_log is False) :
-        print('higest wavenumner')
-        breakpoint_log =  k_log[-1]
-
-    breakpoint_pos                  = abs(k_log -breakpoint_log).argmin()
-    breakpoint_k                    = k[breakpoint_pos]
-
-    #plot_flag= False
-    if plot_flag:
-        # plt.plot(np.log(d_grad.k), d_grad)
-        # plt.show()
-        pw_fit.plot()
-        #plt.plot(np.log(data_xr.k), np.log(data_xr))
-        plt.plot(k_log, data_log )
-        #plt.gca().set_xscale('log')
-        #plt.plot([np.log(breakpoint_k), np.log(breakpoint_k)], [-6, -5])
-        #print(k_end)
-
-    return breakpoint_k, pw_fit
-
-#G_gFT_smth.isel(x=7).plot()
-
-k_lim_list = list()
-k_end_previous = np.nan
-x = G_gFT_smth.x.data[0]
-k = G_gFT_smth.k.data
-
-for x in G_gFT_smth.x.data:
-    #x = G_gFT_smth.isel(x=9).x
-    #x= 237500.0
-    print(x)
-    # use displacement power spectrum
-    k_end, pw_fit = define_noise_wavenumber_piecewise(G_gFT_smth.sel(x=x)/k, plot_flag =False )
-    #pw_fit.get_results()
-    #pw_fit.n_breakpoints
-
-    #pw_fit.summary()
-    #k_end, slope = define_noise_wavenumber_piecewise(G_gFT_smth.sel(x=x), k_lead_peak, k_end_lim= k_end_0, plot_flag =True )
-    #k_end = define_noise_wavenumber_tresh_simple(G_gFT_smth.sel(x=x), k_lead_peak, k_end_lim= k_end_0, plot_flag =True )
-
-
-    k_save = k_end_previous if k_end == k[0] else k_end
-    #k_save = k_end_previous if k_end >= k[-1]*0.95 else k_end
-
-    #k_save = k_end_previous if k_end == k[-1] else k_end
-    k_end_previous = k_save #if k_end_0 is None else k_end_0
-    k_lim_list.append(k_save)
-
-    #k_save = np.nan if slope >= 0 else k_end
-    # plt.gca().axvline(np.log(k_save), linewidth= 2, color='red')
-    # plt.show()
-    print('--------------------------')
-# %%
-# write k limits to datasets
-# lanczos.lanczos_filter_1d(G_gFT_smth.x, k_lim_list, 2)
-# lanczos.lanczos_filter_1d_wrapping
-
-font_for_pres()
-G_gFT_smth.coords['k_lim'] = ('x', k_lim_list )
-G_gFT_smth.k_lim.plot()
-#G_gFT_smth.k_lim.rolling(x=4,  center=True, min_periods=1).median().plot()
-k_lim_smth = G_gFT_smth.k_lim.rolling(x=3,  center=True, min_periods=1).mean()
-k_lim_smth.plot(c='r')
-
-plt.title('k_c filter', loc='left')
-F.save_light(path=plot_path, name = str(ID_name)+ '_B06_atten_ov')
-
-G_gFT_smth['k_lim']  = k_lim_smth #G_gFT_smth.k_lim.rolling(x=3,  center=True, min_periods=1).mean().plot(c='r').data
-G_gFT_wmean.coords['k_lim'] = k_lim_smth #('x', k_lim_smth )
-
-
-# %%
-font_for_print()
-
-fn = copy.copy(lstrings)
-F = M.figure_axis_xy(fig_sizes['two_column'][0], fig_sizes['two_column'][0]* 0.9, container= True, view_scale =1)
-
-
-plt.suptitle('Cut-off Frequency for Displacement Spectral\n' + io.ID_to_str(ID_name), y = 0.97)
-gs = GridSpec(8,3,  wspace=0.1,  hspace=1.5)#figure=fig,#
-
-#
-# #clev = M.clevels( [Gmean.quantile(0.6).data * 1e4, Gmean.quantile(0.99).data * 1e4], 31)/ 1e4
-#
-k_lims = G_gFT_wmean.k_lim
-xlims= G_gFT_wmean.k[0], G_gFT_wmean.k[-1]
-#
-k =high_beams[0]
-for pos, k, pflag in zip([gs[0:2, 0],gs[0:2, 1],gs[0:2, 2] ], high_beams, [True, False, False] ):
-    ax0 = F.fig.add_subplot(pos)
-    Gplot = Gk.sel(beam = k).isel(x = slice(0, -1)).gFT_PSD_data.squeeze().rolling(k=20, x=2, min_periods= 1, center=True).mean()
-    #Gplot.plot()
-
-    Gplot= Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)#.plot()
-    #Gplot.plot()
-
-
-    alpha_range= iter(np.linspace(1,0, Gplot.x.data.size))
-    for x in Gplot.x.data:
-        ialpha =next(alpha_range)
-        plt.loglog(Gplot.k, Gplot.sel(x=x)/Gplot.k, linewidth = 0.5, color= col.rels[k], alpha= ialpha)
-        ax0.axvline(k_lims.sel(x=x), linewidth= 0.4, color= 'black', zorder= 0, alpha=ialpha)
-
-    plt.title(next(fn) + k, color= col_dict[k], loc= 'left')
-    plt.xlim(xlims)
-    #
-    if pflag:
-        ax0.tick_params(labelbottom=False, bottom=True)
-        plt.ylabel("Power (m$^2$/k')")
-        plt.legend()
-    else:
-        ax0.tick_params(labelbottom=False, bottom=True, labelleft=False)
-
-for pos, k, pflag in zip([gs[2:4, 0],gs[2:4, 1],gs[2:4, 2] ], low_beams, [True, False, False] ):
-    ax0 = F.fig.add_subplot(pos)
-    Gplot = Gk.sel(beam = k).isel(x = slice(0, -1)).gFT_PSD_data.squeeze().rolling(k=20, x=2, min_periods= 1, center=True).mean()
-    #Gplot.mean('x').plot()
-
-    Gplot= Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)#.plot()
-
-    alpha_range= iter(np.linspace(1,0, Gplot.x.data.size))
-    for x in Gplot.x.data:
-        ialpha =next(alpha_range)
-        plt.loglog(Gplot.k, Gplot.sel(x=x)/Gplot.k, linewidth = 0.5, color= col.rels[k], alpha= ialpha)
-        ax0.axvline(k_lims.sel(x=x), linewidth= 0.4, color= 'black', zorder= 0, alpha=ialpha)
-
-    plt.title(next(fn) + k, color= col_dict[k], loc= 'left')
-    plt.xlim(xlims)
-    plt.xlabel("observed wavenumber k' ")
-
-    #
-    if pflag:
-        ax0.tick_params( bottom=True)
-        plt.ylabel("Power (m$^2$/k')")
-        plt.legend()
-    else:
-        ax0.tick_params(bottom=True, labelleft=False)
-
-F.save_light(path=plot_path, name =str(ID_name) + '_B06_atten_ov_simple')
-F.save_pup(path=plot_path, name = str(ID_name) + '_B06_atten_ov_simple')
-
-# %%
-pos = gs[5:, 0:2]
-ax0 = F.fig.add_subplot(pos)
-
-lat_str = str(np.round( Gx.isel(x = 0).lat.mean().data, 2)  ) +' to ' + str(np.round( Gx.isel(x = -1).lat.mean().data, 2)  )
-plt.title(next(fn) + 'Mean Displacement Spectra\n(lat='+ lat_str +')', loc='left')
-
-dd = (10 * np.log( (G_gFT_smth/G_gFT_smth.k) .isel(x = slice(0, -1))))#.plot()
-dd = dd.where(~np.isinf(dd), np.nan)
-
-## filter out segments with less then 10% of data points
-dd= dd.where(G_gFT_smth["N_per_stancil_fraction"] >= 0.1)#.plot()
-
-dd_lims = np.round(dd.quantile(0.01).data*0.95, 0) , np.round(dd.quantile(0.95).data*1.05, 0)
-plt.pcolor(dd.x/1e3, dd.k, dd, vmin=dd_lims[0], vmax= dd_lims[-1], cmap = col.white_base_blgror)
-cb = plt.colorbar(orientation= 'vertical')
-
-cb.set_label('Power (m$^2$/k)')
-plt.plot( G_gFT_smth.isel(x = slice(0, -1)).x/1e3 ,  G_gFT_smth.isel(x = slice(0, -1)).k_lim , color= col.black, linewidth = 1)
-plt.ylabel('wavenumber k')
-plt.xlabel('X (km)')
-
-pos = gs[6:, -1]
-ax9 = F.fig.add_subplot(pos)
-
-plt.title('Data Coverage (%)', loc ='left')
-plt.plot(G_gFT_smth.x/1e3 , G_gFT_smth["N_per_stancil_fraction"]*100 , linewidth = 0.8, color = 'black')
-ax9.spines['left'].set_visible(False)
-ax9.spines['right'].set_visible(True)
-ax9.tick_params(labelright=True, right=True, labelleft=False, left=False)
-ax9.axhline(10, linewidth = 0.8, linestyle= '--', color ='black')
-#plt.ylabel('(%)')
-plt.xlabel('X (km)')
-
-
-F.save_light(path=plot_path, name =str(ID_name) + '_B06_atten_ov')
-F.save_pup(path=plot_path, name = str(ID_name) + '_B06_atten_ov')
-
-
-# %% reconstruct slope displacement data
-def fit_offset(x, data,  model, nan_mask, deg):
-
-    #x, data,  model, nan_mask, deg = dist_stencil, height_data, height_model, dist_nanmask, 1
-    p_offset = np.polyfit(x[~nan_mask], data[~nan_mask] - model[~nan_mask], deg)
-    p_offset[-1] = 0
-    poly_offset = np.polyval(p_offset,x )
-    return poly_offset
-
-def tanh_fitler(x, x_cutoff , sigma_g= 0.01):
-    """
-        zdgfsg
-    """
-
-    decay   =  0.5 - np.tanh( (x-x_cutoff)/sigma_g  )/2
-    return decay
-
-
-#plt.plot(x, tanh_fitler(Gk_1.k, k_thresh, sigma_g= 0.003) )
-
-
-def reconstruct_displacement(Gx_1, Gk_1, T3, k_thresh):
-
-    """
-    reconstructs photon displacement heights for each stancil given the model parameters in Gk_1
-    A low-pass frequeny filter can be applied using k-thresh
-
-    inputs:
-    Gk_1    model data per stencil from _gFT_k file with sin and cos coefficients
-    Gx_1    real data per stencil from _gFT_x file with mean photon heights and coordindate systems
-    T3
-    k_thresh (None) threshold for low-pass filter
-
-    returns:
-    height_model  reconstucted displements heights of the stancil
-    poly_offset   fitted staight line to the residual between observations and model to account for low-pass variability
-    nan_mask      mask where is observed data in
-    """
-
-    dist_stencil = Gx_1.eta + Gx_1.x
-    dist_stencil_lims = dist_stencil[0].data, dist_stencil[-1].data
-
-    gFT_cos_coeff_sel = np.copy(Gk_1.gFT_cos_coeff)
-    gFT_sin_coeff_sel = np.copy(Gk_1.gFT_sin_coeff)
-
-    gFT_cos_coeff_sel = gFT_cos_coeff_sel*tanh_fitler(Gk_1.k, k_thresh, sigma_g= 0.003)
-    gFT_sin_coeff_sel = gFT_sin_coeff_sel*tanh_fitler(Gk_1.k, k_thresh, sigma_g= 0.003)
-
-    # gFT_cos_coeff_sel[Gk_1.k > k_thresh] = 0
-    # gFT_sin_coeff_sel[Gk_1.k > k_thresh] = 0
-
-
-    FT_int = gFT.generalized_Fourier(Gx_1.eta + Gx_1.x, None,Gk_1.k )
-    _ = FT_int.get_H()
-    FT_int.p_hat = np.concatenate([ -gFT_sin_coeff_sel /Gk_1.k, gFT_cos_coeff_sel/Gk_1.k ])
-
-    dx = Gx.eta.diff('eta').mean().data
-    height_model = FT_int.model() /dx# + T3_sel['heights_c_weighted_mean'].iloc[0]
-
-    dist_nanmask = np.isnan(Gx_1.y_data)
-    height_data  = np.interp(dist_stencil, T3_sel['dist'],  T3_sel['heights_c_weighted_mean']) #[~np.isnan(Gx_1.y_data)]
-    #poly_offset = fit_offset(dist_stencil, height_data, height_model, dist_nanmask, 1)
-
-    return height_model, np.nan, dist_nanmask
-
-# cutting Table data
-
-
-# %%
-G_height_model=dict()
-k       = 'gt2l'
-for bb in Gx.beam.data:
-    G_height_model_temp= dict()
-    for i in np.arange(Gx.x.size):
-        #k_thresh= 4
-
-        Gx_1    = Gx.isel(x= i).sel(beam = bb)
-        Gk_1    = Gk.isel(x= i).sel(beam = bb)
-        k_thresh= G_gFT_smth.k_lim.isel(x=0).data
-
-
-        dist_stencil        = Gx_1.eta + Gx_1.x
-        dist_stencil_lims   = dist_stencil[0].data, dist_stencil[-1].data
-        dist_stencil_lims_plot = dist_stencil_lims#Gx_1.eta[0]*0.25 + Gx_1.x, Gx_1.eta[-1]*0.25 + Gx_1.x
-        dist_stencil_lims_plot = Gx_1.eta[0]*1 + Gx_1.x, Gx_1.eta[-1]*1 + Gx_1.x
-
-        T3_sel              = B3[k].loc[( (B3[k]['dist']   >= dist_stencil_lims[0])    & (B3[k]['dist']    <= dist_stencil_lims[1])   )]
-        T2_sel              = B2[k].loc[(  B2[k]['x_true'] >= T3_sel['x_true'].min() ) & ( B2[k]['x_true'] <= T3_sel['x_true'].max()  )]
-
-        if T3_sel.shape[0] != 0:
-            if T3_sel['x_true'].iloc[-1] < T3_sel['x_true'].iloc[0]:
-                dist_T2_temp =np.interp(T2_sel['x_true'][::-1], T3_sel['x_true'][::-1],  T3_sel['dist'][::-1] )
-                T2_sel['dist']      = dist_T2_temp[::-1]
-            else:
-                dist_T2_temp =np.interp(T2_sel['x_true'], T3_sel['x_true'],  T3_sel['dist'] )
-                T2_sel['dist']      = dist_T2_temp
-
-            height_model, poly_offset, dist_nanmask = reconstruct_displacement(Gx_1, Gk_1, T3_sel, k_thresh = k_thresh)
-            poly_offset = poly_offset*0
-            G_height_model_temp[str(i) + bb]     = xr.DataArray(height_model, coords=Gx_1.coords, dims= Gx_1.dims, name = 'height_model' )
-        else:
-            G_height_model_temp[str(i) + bb]     = xr.DataArray(Gx_1.y_model.data, coords=Gx_1.coords, dims= Gx_1.dims, name = 'height_model' )
-
-        #G_height_nans[i]      = xr.DataArray(dist_nanmask, coords=Gx_1.coords, dims= Gx_1.dims, name = 'nanmask' )
-
-        # jsut for plotting:
-        # # corrected rar Photon heights
-        # T2_sel['heights_c_residual']            = photon_height_residual = T2_sel['heights_c'] - np.interp(T2_sel['dist'], dist_stencil, height_model +  poly_offset)
-        #
-        # # interpolate rebinned photon heights
-        # heights_c_weighted_mean_stancil         = np.interp(dist_stencil, T3_sel['dist'], T3_sel['heights_c_weighted_mean'] )
-        #
-        # # corrected rebinned photon heights
-        # photon_height_residual_mean                = heights_c_weighted_mean_stancil   - (height_model + poly_offset)
-        # photon_height_residual_mean[dist_nanmask]  = np.nan
-        # T3_sel['heights_c_weighted_mean_residual'] = T3_sel['heights_c_weighted_mean'] - np.interp(T3_sel['dist'], dist_stencil, height_model +  poly_offset )
-
-        #plot
-        # font_for_pres()
-        # M.figure_axis_xy(5.5, 6, view_scale = 0.8)
-        #
-        # plt.subplot(3,1 ,1)
-        # plt.scatter(T2_sel['dist'], T2_sel['heights_c'], s= 1,  marker='o', color='black',   alpha =0.2, edgecolors= 'none' )
-        # #plt.scatter(T3_sel['dist'], T3_sel['heights_c_weighted_mean'], s= 1,  marker='o', color='black',   alpha =0.2, edgecolors= 'none' )
-        # plt.plot(T3_sel['dist'], T3_sel['heights_c_weighted_mean'] , color =col.rascade1, linewidth = 0.5, label = 'residual $h_c$')
-        # plt.xlim(dist_stencil_lims_plot)
-        # plt.ylim(0, 1.5)
-        #
-        # ax1 = plt.subplot(3,1 ,2)
-        # plt.plot(dist_stencil, height_model + poly_offset ,'-', c='red', linewidth=0.8, alpha=1,zorder= 12, label = 'GFT height model + correction')
-        # plt.plot(dist_stencil, height_model ,'-', c='orange', linewidth=0.8, alpha=0.5,zorder= 2, label = 'GFT height model')
-        # plt.legend(loc = 1)
-        # plt.xlim(dist_stencil_lims_plot)
-        # ax1.axhline(0, linewidth=0.5, color= 'black')
-        #
-        # plt.subplot(3,1 ,3)
-        # plt.scatter(T2_sel['dist'], T2_sel['heights_c_residual'], s= 1,  marker='o', color='black',   alpha =0.5, edgecolors= 'none', zorder=6 )
-        # #plt.scatter(T2_sel['dist'], T2_sel['heights_c_residual'], s= 1,  marker='o', color='black',   alpha =1, edgecolors= 'none' )
-        #
-        # plt.plot(T3_sel['dist'], T3_sel['heights_c_weighted_mean_residual'],'-', c=col.rascade2, linewidth=0.5, alpha=1, zorder= 10, label = 'GFT height model + correction')
-        # #plt.plot(dist_stencil, photon_height_residual_mean,'-', c='red', linewidth=0.3, alpha=1, zorder= 2, label = 'GFT height model + correction')
-        # plt.fill_between(dist_stencil , photon_height_residual_mean, color= col.cascade2, edgecolor = None, alpha = 1, zorder= 0)
-        #
-        # plt.xlim(dist_stencil_lims_plot)
-        # plt.ylim(0, 1.5)
-
-    G_height_model[bb] = xr.concat(G_height_model_temp.values(), dim= 'x').T
-
-Gx['height_model'] = xr.concat(G_height_model.values(), dim= 'beam').transpose('eta', 'beam', 'x')
-
-# %%
-Gx_v2, B2_v2, B3_v2 = dict(), dict(), dict()
-for bb in Gx.beam.data:
-    print(bb)
-    Gx_k                 = Gx.sel(beam = bb)
-    #Gx_k['height_model'] = xr.concat(G_height_model.values(), dim= 'x').T#.plot()
-    Gh          = Gx['height_model'].sel(beam = bb).T
-    Gh_err      = Gx_k['model_error_x'].T
-    Gnans       = np.isnan(Gx_k.y_model)
-
-    concented_heights   = Gh.data.reshape(Gh.data.size)
-    concented_err       = Gh_err.data.reshape(Gh.data.size)
-    concented_nans      = Gnans.data.reshape(Gnans.data.size)
-    concented_x         = (Gh.x+Gh.eta).data.reshape(Gh.data.size)
-
-    dx                      = Gh.eta.diff('eta')[0].data
-    continous_x_grid        = np.arange(concented_x.min(), concented_x.max(), dx)
-    continous_height_model  = np.interp(continous_x_grid, concented_x, concented_heights )
-    concented_err           = np.interp(continous_x_grid, concented_x, concented_err )
-    continous_nans          = np.interp(continous_x_grid, concented_x, concented_nans ) ==1
-
-    T3              = B3[bb]#.loc[( (B3[k]['dist']   >= dist_stencil_lims[0])    & (B3[k]['dist']    <= dist_stencil_lims[1])   )]
-    T2              = B2[bb]#.loc[(  B2[k]['x_true'] >= T3_sel['x_true'].min() ) & ( B2[k]['x_true'] <= T3_sel['x_true'].max()  )]
-
-    T2 = T2.sort_values('x_true')
-    T3 = T3.sort_values('x_true')
-    T2['dist']    = np.interp(T2['x_true'], T3['x_true'],  T3['dist'] )
-    T2 = T2.sort_values('dist')
-    T3 = T3.sort_values('dist')
-
-    #T2              = T2.sort_index()
-    #T2['dist']      = np.interp(T2['x_true'], T3['x_true'],  T3['dist'] )
-
-    T3['heights_c_model']     = np.interp(T3['dist'], continous_x_grid, continous_height_model)
-    T3['heights_c_model_err'] = np.interp(T3['dist'], continous_x_grid, concented_err)
-    T3['heights_c_residual']  = T3['heights_c_weighted_mean'] - T3['heights_c_model']
-
-    T2['heights_c_model']     = np.interp(T2['dist'], continous_x_grid, continous_height_model)
-    T2['heights_c_residual']  = T2['heights_c'] - T2['heights_c_model']
-
-
-    B2_v2[bb] = T2
-    B3_v2[bb] = T3
-    Gx_v2[bb] = Gx_k
-
-    # font_for_print()
-    # F = M.figure_axis_xy(6, 2, view_scale= 0.7)
-    #
-    # plt.plot(T2['dist'] , T2['heights_c']+2,'ok', markersize=0.8, alpha=0.5, label='org photon height_c')
-    # plt.plot(T3['dist'] , T3['heights_c_weighted_mean']+2,'.r', markersize=1, alpha=0.5, label='org photon wmean')
-    #
-    # plt.plot(T2['dist'] , T2['heights_c_model'], '.', markersize=1, alpha=0.8, label='height model', color=col.orange, zorder= 12)
-    # F.ax.axhline(2, linewidth = .7, color= 'black')
-    # F.ax.axhline(0, linewidth = .7, color= 'black')
-    # F.ax.axhline(-2, linewidth = .7, color= 'black')
-    #
-    # plt.plot(T2['dist'] , T2['heights_c_residual']-2,'ob', markersize=0.5, alpha=0.5, label='residual photons')
-    # plt.plot(T3['dist'], T3['heights_c_residual']-2 , 'r', linewidth= 0.8, zorder=12, label='photon height_c resodual')
-    #
-    # xlims = np.nanmean(T2['dist']), np.nanmean(T2['dist'])+7e3
-    # plt.xlim(xlims)
-    # dlim = np.nanmax(T3['heights_c_residual'][(T3['dist']> xlims[0]) & (T3['dist'] < xlims[1])])
-    # #plt.ylim(-dlim*1.5, dlim*1.5)
-    # try:
-    #     plt.ylim((-2-1.5*dlim), 2+1.5*dlim)
-    # except:
-    #     plt.ylim(-5, 5)
-    # plt.legend( ncol= 4)
-    #F.save_light(path = plot_path , name = 'B06_'+bb+'__check')
-
-
-# %% correct wave incident direction
-
-load_path = mconfig['paths']['work'] + '/B04_angle_'+hemis+'/'
-
-try:
-    G_angle = xr.open_dataset(load_path+ '/B05_'+ID_name + '_angle_pdf.nc' )
-
-    font_for_pres()
-
-    Ga_abs = (G_angle.weighted_angle_PDF_smth.isel(angle = G_angle.angle > 0).data + G_angle.weighted_angle_PDF_smth.isel(angle = G_angle.angle < 0).data[:,::-1])/2
-    Ga_abs = xr.DataArray(data=Ga_abs, dims = G_angle.dims, coords=G_angle.isel(angle = G_angle.angle > 0).coords)
-
-    Ga_abs_front = Ga_abs.isel(x= slice(0, 3))
-    Ga_best = ((  Ga_abs_front * Ga_abs_front.N_data ).sum('x')/Ga_abs_front.N_data.sum('x'))
-
-    theta = Ga_best.angle[Ga_best.argmax()].data
-    theta_flag = True
-
-    font_for_print()
-    F = M.figure_axis_xy(3, 5, view_scale= 0.7)
-
-    plt.subplot(2, 1, 1)
-    plt.pcolor(Ga_abs)
-    plt.xlabel('abs angle')
-    plt.ylabel('x')
-
-    ax = plt.subplot(2, 1, 2)
-    Ga_best.plot()
-    plt.title('angle front ' + str(theta*180/np.pi), loc='left')
-    ax.axvline(theta, color= 'red')
-    F.save_light(path = plot_path , name = 'B06_angle_def')
-except:
-
-    print('no angle data found, skip angle corretion')
-    theta= 0
-    theta_flag = False
-
-# %%
-lam_p   = 2 *np.pi/Gk.k
-lam     = lam_p * np.cos(theta)
-
-if theta_flag:
-    k_corrected  = 2 * np.pi/lam
-    x_corrected  = Gk.x * np.cos(theta)
-else:
-    k_corrected  = 2 * np.pi/lam *np.nan
-    x_corrected  = Gk.x * np.cos(theta) *np.nan
-
-# %% spectral save
-G5 = G_gFT_wmean.expand_dims(dim = 'beam', axis = 1)
-G5.coords['beam'] = ['weighted_mean']#(('beam'), 'weighted_mean')
-G5 = G5.assign_coords(N_photons= G5.N_photons)
-G5['N_photons'] = G5['N_photons'].expand_dims('beam')
-G5['N_per_stancil_fraction'] = G5['N_per_stancil_fraction'].expand_dims('beam')
-
-Gk_v2 = xr.merge([Gk, G5])
-
-Gk_v2 = Gk_v2.assign_coords(x_corrected=("x", x_corrected.data)).assign_coords(k_corrected=("k", k_corrected.data))
-
-Gk_v2.attrs['best_guess_incident_angle'] = theta
-
-# save collected spectral data
-Gk_v2.to_netcdf(save_path+'/B06_'+ID_name + '_gFT_k_corrected.nc' )
-Gx
-# %% save real space data
-Gx.to_netcdf(save_path+'/B06_'+ID_name + '_gFT_x_corrected.nc' )
-try:
-    io.save_pandas_table(B2_v2, 'B06_' +ID_name + '_B06_corrected_resid' , save_path) # all photos but heights adjusted and with distance coordinate
-except:
-    os.remove(save_path+'B06_' +ID_name + '_B06_corrected_resid.h5')
-    io.save_pandas_table(B2_v2, 'B06_' +ID_name + '_B06_corrected_resid' , save_path) # all photos but heights adjusted and with distance coordinate
-
-try:
-    io.save_pandas_table(B3_v2, 'B06_' +ID_name + '_binned_resid' , save_path) # regridding heights
-except:
-    os.remove(save_path+'B06_' +ID_name + '_binned_resid.h5')
-    io.save_pandas_table(B3_v2, 'B06_' +ID_name + '_binned_resid' , save_path) # regridding heights
-
-MT.json_save('B06_success', plot_path + '../', {'time':time.asctime( time.localtime(time.time()) )})
-print('done. saved target at ' + plot_path + '../B06_success' )
diff --git a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
new file mode 100644
index 00000000..2174b071
--- /dev/null
+++ b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
@@ -0,0 +1,775 @@
+import os, sys
+
+
+"""
+This file open a ICEsat2 track applied filters and corections and returns smoothed photon heights on a regular grid in an .nc file.
+This is python 3
+"""
+from icesat2_tracks.config.IceSAT2_startup import (
+    mconfig,
+    xr,
+    color_schemes,
+    font_for_pres,
+    font_for_print,
+    plt,
+    np,
+    lstrings,
+    fig_sizes,
+)
+
+
+import h5py
+import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.local_modules.m_tools_ph3 as MT
+from icesat2_tracks.local_modules import m_general_ph3 as M
+import time
+import copy
+import icesat2_tracks.ICEsat2_SI_tools.generalized_FT as gFT
+from scipy.ndimage.measurements import label
+import pandas as pd
+from matplotlib.gridspec import GridSpec
+
+xr.set_options(display_style="text")
+ID_name, batch_key, test_flag = io.init_from_input(sys.argv)
+hemis, batch = batch_key.split("_")
+
+all_beams = mconfig["beams"]["all_beams"]
+high_beams = mconfig["beams"]["high_beams"]
+low_beams = mconfig["beams"]["low_beams"]
+
+load_path_work = mconfig["paths"]["work"] + "/" + batch_key + "/"
+B3_hdf5 = h5py.File(
+    load_path_work + "B01_regrid" + "/" + ID_name + "_B01_binned.h5", "r"
+)
+
+
+load_path_angle = mconfig["paths"]["work"] + "/" + batch_key + "/B04_angle/"
+
+B3 = dict()
+for b in all_beams:
+    B3[b] = io.get_beam_hdf_store(B3_hdf5[b])
+
+B3_hdf5.close()
+
+load_file = load_path_work + "/B02_spectra/" + "B02_" + ID_name  # + '.nc'
+Gk = xr.open_dataset(load_file + "_gFT_k.nc")
+Gx = xr.open_dataset(load_file + "_gFT_x.nc")
+Gfft = xr.open_dataset(load_file + "_FFT.nc")
+
+plot_path = (
+    mconfig["paths"]["plot"]
+    + "/"
+    + hemis
+    + "/"
+    + batch_key
+    + "/"
+    + ID_name
+    + "/B06_correction/"
+)
+MT.mkdirs_r(plot_path)
+
+save_path = mconfig["paths"]["work"] + batch_key + "/B06_corrected_separated/"
+MT.mkdirs_r(save_path)
+
+
+color_schemes.colormaps2(31, gamma=1)
+col_dict = color_schemes.rels
+
+
+def dict_weighted_mean(Gdict, weight_key):
+    """
+    returns the weighted meean of a dict of xarray, data_arrays
+    weight_key must be in the xr.DataArrays
+    """
+
+    akey = list(Gdict.keys())[0]
+    GSUM = Gdict[akey].copy()
+    GSUM.data = np.zeros(GSUM.shape)
+    N_per_stancil = GSUM.N_per_stancil * 0
+    N_photons = np.zeros(GSUM.N_per_stancil.size)
+
+    counter = 0
+    for k, I in Gdict.items():
+        I = I.squeeze()
+        print(len(I.x))
+        if len(I.x) != 0:
+            GSUM += I.where(~np.isnan(I), 0) * I[weight_key]
+            N_per_stancil += I[weight_key]
+        if "N_photons" in GSUM.coords:
+            N_photons += I["N_photons"]
+        counter += 1
+
+    GSUM = GSUM / N_per_stancil
+
+    if "N_photons" in GSUM.coords:
+        GSUM.coords["N_photons"] = (("x", "beam"), np.expand_dims(N_photons, 1))
+
+    GSUM["beam"] = ["weighted_mean"]
+    GSUM.name = "power_spec"
+
+    return GSUM
+
+
+G_gFT_wmean = (Gk.where(~np.isnan(Gk["gFT_PSD_data"]), 0) * Gk["N_per_stancil"]).sum(
+    "beam"
+) / Gk["N_per_stancil"].sum("beam")
+G_gFT_wmean["N_photons"] = Gk["N_photons"].sum("beam")
+
+G_fft_wmean = (Gfft.where(~np.isnan(Gfft), 0) * Gfft["N_per_stancil"]).sum(
+    "beam"
+) / Gfft["N_per_stancil"].sum("beam")
+G_fft_wmean["N_per_stancil"] = Gfft["N_per_stancil"].sum("beam")
+
+
+# plot
+# derive spectral errors:
+Lpoints = Gk.Lpoints.mean("beam").data
+N_per_stancil = Gk.N_per_stancil.mean("beam").data  # [0:-2]
+
+G_error_model = dict()
+G_error_data = dict()
+
+for bb in Gk.beam.data:
+    I = Gk.sel(beam=bb)
+    b_bat_error = np.concatenate([I.model_error_k_cos.data, I.model_error_k_sin.data])
+    Z_error = gFT.complex_represenation(b_bat_error, Gk.k.size, Lpoints)
+    PSD_error_data, PSD_error_model = gFT.Z_to_power_gFT(
+        Z_error, np.diff(Gk.k)[0], N_per_stancil, Lpoints
+    )
+
+    G_error_model[bb] = xr.DataArray(
+        data=PSD_error_model,
+        coords=I.drop("N_per_stancil").coords,
+        name="gFT_PSD_data_error",
+    ).expand_dims("beam")
+    G_error_data[bb] = xr.DataArray(
+        data=PSD_error_data,
+        coords=I.drop("N_per_stancil").coords,
+        name="gFT_PSD_data_error",
+    ).expand_dims("beam")
+
+gFT_PSD_data_error_mean = xr.concat(G_error_model.values(), dim="beam")
+gFT_PSD_data_error_mean = xr.concat(G_error_data.values(), dim="beam")
+
+gFT_PSD_data_error_mean = (
+    gFT_PSD_data_error_mean.where(~np.isnan(gFT_PSD_data_error_mean), 0)
+    * Gk["N_per_stancil"]
+).sum("beam") / Gk["N_per_stancil"].sum("beam")
+gFT_PSD_data_error_mean = (
+    gFT_PSD_data_error_mean.where(~np.isnan(gFT_PSD_data_error_mean), 0)
+    * Gk["N_per_stancil"]
+).sum("beam") / Gk["N_per_stancil"].sum("beam")
+
+G_gFT_wmean["gFT_PSD_data_err"] = gFT_PSD_data_error_mean
+G_gFT_wmean["gFT_PSD_data_err"] = gFT_PSD_data_error_mean
+
+Gk["gFT_PSD_data_err"] = xr.concat(G_error_model.values(), dim="beam")
+Gk["gFT_PSD_data_err"] = xr.concat(G_error_data.values(), dim="beam")
+
+
+#
+
+G_gFT_smth = (
+    G_gFT_wmean["gFT_PSD_data"].rolling(k=30, center=True, min_periods=1).mean()
+)
+G_gFT_smth["N_photons"] = G_gFT_wmean.N_photons
+G_gFT_smth["N_per_stancil_fraction"] = Gk["N_per_stancil"].T.mean(
+    "beam"
+) / Gk.Lpoints.mean("beam")
+
+k = G_gFT_smth.k
+
+F = M.figure_axis_xy()
+
+plt.loglog(k, G_gFT_smth / k)
+
+plt.title("displacement power Spectra", loc="left")
+
+
+def define_noise_wavenumber_tresh_simple(
+    data_xr, k_peak, k_end_lim=None, plot_flag=False
+):
+    """
+    returns noise wavenumber on the high end of a spectral peak. This method fits a straight line in loglog speace using robust regression.
+    The noise level is defined as the wavenumber at which the residual error of a linear fit to the data is minimal.
+
+    inputs:
+    data_xr xarray.Dataarray with the power spectra with k as dimension
+    k_peak  wavenumber above which the searh should start
+    dk      the intervall over which the regrssion is repeated
+
+    returns:
+    k_end   the wavenumber at which the spectrum flattens
+    m       slope of the fitted line
+    b       intersect of the fitted line
+    """
+    from scipy.ndimage.measurements import label
+
+    if k_end_lim is None:
+        k_end_lim = data_xr.k[-1]
+
+    k_lead_peak_margin = k_peak * 1.05
+    try:
+        data_log = (
+            np.log(data_xr)
+            .isel(k=(data_xr.k > k_lead_peak_margin))
+            .rolling(k=10, center=True, min_periods=1)
+            .mean()
+        )
+
+    except:
+        data_log = (
+            np.log(data_xr)
+            .isel(k=(data_xr.k > k_lead_peak_margin / 2))
+            .rolling(k=10, center=True, min_periods=1)
+            .mean()
+        )
+
+    k_log = np.log(data_log.k)
+    try:
+        d_grad = (
+            data_log.differentiate("k").rolling(k=40, center=True, min_periods=4).mean()
+        )
+    except:
+        d_grad = (
+            data_log.differentiate("k").rolling(k=20, center=True, min_periods=2).mean()
+        )
+    ll = label(d_grad >= -5)
+
+    if ll[0][0] != 0:
+        print("no decay, set to peak")
+        return k_peak
+
+    if sum(ll[0]) == 0:
+        k_end = d_grad.k[-1]
+    else:
+        k_end = d_grad.k[(ll[0] == 1)][0].data
+
+    if plot_flag:
+        plt.plot(np.log(data_xr.k), np.log(data_xr))
+        plt.plot(k_log, data_log)
+        plt.plot([np.log(k_end), np.log(k_end)], [-6, -5])
+    return k_end
+
+
+# new version
+def get_correct_breakpoint(pw_results):
+    br_points = list()
+    for i in pw_results.keys():
+        [br_points.append(i) if "breakpoint" in i else None]
+    br_points_df = pw_results[br_points]
+    br_points_sorted = br_points_df.sort_values()
+
+    alphas_sorted = [
+        i.replace("breakpoint", "alpha") for i in br_points_df.sort_values().index
+    ]
+    alphas_sorted.append("alpha" + str(len(alphas_sorted) + 1))
+
+    betas_sorted = [
+        i.replace("breakpoint", "beta") for i in br_points_df.sort_values().index
+    ]
+
+    # betas_sorted
+    alphas_v2 = list()
+    alpha_i = pw_results["alpha1"]
+    for i in [0] + list(pw_results[betas_sorted]):
+        alpha_i += i
+        alphas_v2.append(alpha_i)
+
+    alphas_v2_sorted = pd.Series(index=alphas_sorted, data=alphas_v2)
+    br_points_sorted["breakpoint" + str(br_points_sorted.size + 1)] = "end"
+
+    print("all alphas")
+    print(alphas_v2_sorted)
+    slope_mask = alphas_v2_sorted < 0
+
+    if sum(slope_mask) == 0:
+        print("no negative slope found, set to lowest")
+        breakpoint = "start"
+    else:
+        # take steepest slope
+        alpah_v2_sub = alphas_v2_sorted[slope_mask]
+        print(alpah_v2_sub)
+        print(alpah_v2_sub.argmin())
+        break_point_name = alpah_v2_sub.index[alpah_v2_sub.argmin()].replace(
+            "alpha", "breakpoint"
+        )
+
+        # take first slope
+        breakpoint = br_points_sorted[break_point_name]
+
+    return breakpoint
+
+
+def get_breakingpoints(xx, dd):
+    import piecewise_regression
+
+    x2, y2 = xx, dd
+    convergence_flag = True
+    n_breakpoints = 3
+    while convergence_flag:
+        pw_fit = piecewise_regression.Fit(x2, y2, n_breakpoints=n_breakpoints)
+        print("n_breakpoints", n_breakpoints, pw_fit.get_results()["converged"])
+        convergence_flag = not pw_fit.get_results()["converged"]
+        n_breakpoints += 1
+        if n_breakpoints >= 4:
+            convergence_flag = False
+
+    pw_results = pw_fit.get_results()
+
+    if pw_results["converged"]:
+        pw_results_df = pd.DataFrame(pw_results["estimates"]).loc["estimate"]
+
+        breakpoint = get_correct_breakpoint(pw_results_df)
+
+        return pw_fit, breakpoint
+
+    else:
+        return pw_fit, False
+
+
+def define_noise_wavenumber_piecewise(data_xr, plot_flag=False):
+    data_log = data_xr
+    data_log = np.log(data_xr)
+
+    k = data_log.k.data
+    k_log = np.log(k)
+
+    pw_fit, breakpoint_log = get_breakingpoints(k_log, data_log.data)
+
+    if breakpoint_log is "start":
+        print("no decay, set to lowerst wavenumber")
+        breakpoint_log = k_log[0]
+    if (breakpoint_log is "end") | (breakpoint_log is False):
+        print("higest wavenumner")
+        breakpoint_log = k_log[-1]
+
+    breakpoint_pos = abs(k_log - breakpoint_log).argmin()
+    breakpoint_k = k[breakpoint_pos]
+
+    if plot_flag:
+        pw_fit.plot()
+        plt.plot(k_log, data_log)
+
+    return breakpoint_k, pw_fit
+
+
+k_lim_list = list()
+k_end_previous = np.nan
+x = G_gFT_smth.x.data[0]
+k = G_gFT_smth.k.data
+
+for x in G_gFT_smth.x.data:
+    print(x)
+    # use displacement power spectrum
+    k_end, pw_fit = define_noise_wavenumber_piecewise(
+        G_gFT_smth.sel(x=x) / k, plot_flag=False
+    )
+
+    k_save = k_end_previous if k_end == k[0] else k_end
+    k_end_previous = k_save
+    k_lim_list.append(k_save)
+    print("--------------------------")
+
+font_for_pres()
+G_gFT_smth.coords["k_lim"] = ("x", k_lim_list)
+G_gFT_smth.k_lim.plot()
+k_lim_smth = G_gFT_smth.k_lim.rolling(x=3, center=True, min_periods=1).mean()
+k_lim_smth.plot(c="r")
+
+plt.title("k_c filter", loc="left")
+F.save_light(path=plot_path, name=str(ID_name) + "_B06_atten_ov")
+
+G_gFT_smth["k_lim"] = k_lim_smth
+G_gFT_wmean.coords["k_lim"] = k_lim_smth
+
+font_for_print()
+
+fn = copy.copy(lstrings)
+F = M.figure_axis_xy(
+    fig_sizes["two_column"][0],
+    fig_sizes["two_column"][0] * 0.9,
+    container=True,
+    view_scale=1,
+)
+
+
+plt.suptitle(
+    "Cut-off Frequency for Displacement Spectral\n" + io.ID_to_str(ID_name), y=0.97
+)
+gs = GridSpec(8, 3, wspace=0.1, hspace=1.5)
+
+k_lims = G_gFT_wmean.k_lim
+xlims = G_gFT_wmean.k[0], G_gFT_wmean.k[-1]
+#
+k = high_beams[0]
+for pos, k, pflag in zip(
+    [gs[0:2, 0], gs[0:2, 1], gs[0:2, 2]], high_beams, [True, False, False]
+):
+    ax0 = F.fig.add_subplot(pos)
+    Gplot = (
+        Gk.sel(beam=k)
+        .isel(x=slice(0, -1))
+        .gFT_PSD_data.squeeze()
+        .rolling(k=20, x=2, min_periods=1, center=True)
+        .mean()
+    )
+    Gplot = Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)
+    alpha_range = iter(np.linspace(1, 0, Gplot.x.data.size))
+    for x in Gplot.x.data:
+        ialpha = next(alpha_range)
+        plt.loglog(
+            Gplot.k,
+            Gplot.sel(x=x) / Gplot.k,
+            linewidth=0.5,
+            color=color_schemes.rels[k],
+            alpha=ialpha,
+        )
+        ax0.axvline(
+            k_lims.sel(x=x), linewidth=0.4, color="black", zorder=0, alpha=ialpha
+        )
+
+    plt.title(next(fn) + k, color=col_dict[k], loc="left")
+    plt.xlim(xlims)
+    #
+    if pflag:
+        ax0.tick_params(labelbottom=False, bottom=True)
+        plt.ylabel("Power (m$^2$/k')")
+        plt.legend()
+    else:
+        ax0.tick_params(labelbottom=False, bottom=True, labelleft=False)
+
+for pos, k, pflag in zip(
+    [gs[2:4, 0], gs[2:4, 1], gs[2:4, 2]], low_beams, [True, False, False]
+):
+    ax0 = F.fig.add_subplot(pos)
+    Gplot = (
+        Gk.sel(beam=k)
+        .isel(x=slice(0, -1))
+        .gFT_PSD_data.squeeze()
+        .rolling(k=20, x=2, min_periods=1, center=True)
+        .mean()
+    )
+
+    Gplot = Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)
+
+    alpha_range = iter(np.linspace(1, 0, Gplot.x.data.size))
+    for x in Gplot.x.data:
+        ialpha = next(alpha_range)
+        plt.loglog(
+            Gplot.k,
+            Gplot.sel(x=x) / Gplot.k,
+            linewidth=0.5,
+            color=color_schemes.rels[k],
+            alpha=ialpha,
+        )
+        ax0.axvline(
+            k_lims.sel(x=x), linewidth=0.4, color="black", zorder=0, alpha=ialpha
+        )
+
+    plt.title(next(fn) + k, color=col_dict[k], loc="left")
+    plt.xlim(xlims)
+    plt.xlabel("observed wavenumber k' ")
+
+    if pflag:
+        ax0.tick_params(bottom=True)
+        plt.ylabel("Power (m$^2$/k')")
+        plt.legend()
+    else:
+        ax0.tick_params(bottom=True, labelleft=False)
+
+F.save_light(path=plot_path, name=str(ID_name) + "_B06_atten_ov_simple")
+F.save_pup(path=plot_path, name=str(ID_name) + "_B06_atten_ov_simple")
+
+pos = gs[5:, 0:2]
+ax0 = F.fig.add_subplot(pos)
+
+lat_str = (
+    str(np.round(Gx.isel(x=0).lat.mean().data, 2))
+    + " to "
+    + str(np.round(Gx.isel(x=-1).lat.mean().data, 2))
+)
+plt.title(next(fn) + "Mean Displacement Spectra\n(lat=" + lat_str + ")", loc="left")
+
+dd = 10 * np.log((G_gFT_smth / G_gFT_smth.k).isel(x=slice(0, -1)))
+dd = dd.where(~np.isinf(dd), np.nan)
+
+## filter out segments with less then 10% of data points
+dd = dd.where(G_gFT_smth["N_per_stancil_fraction"] >= 0.1)
+
+dd_lims = np.round(dd.quantile(0.01).data * 0.95, 0), np.round(
+    dd.quantile(0.95).data * 1.05, 0
+)
+plt.pcolor(
+    dd.x / 1e3,
+    dd.k,
+    dd,
+    vmin=dd_lims[0],
+    vmax=dd_lims[-1],
+    cmap=color_schemes.white_base_blgror,
+)
+cb = plt.colorbar(orientation="vertical")
+
+cb.set_label("Power (m$^2$/k)")
+plt.plot(
+    G_gFT_smth.isel(x=slice(0, -1)).x / 1e3,
+    G_gFT_smth.isel(x=slice(0, -1)).k_lim,
+    color=color_schemes.black,
+    linewidth=1,
+)
+plt.ylabel("wavenumber k")
+plt.xlabel("X (km)")
+
+pos = gs[6:, -1]
+ax9 = F.fig.add_subplot(pos)
+
+plt.title("Data Coverage (%)", loc="left")
+plt.plot(
+    G_gFT_smth.x / 1e3,
+    G_gFT_smth["N_per_stancil_fraction"] * 100,
+    linewidth=0.8,
+    color="black",
+)
+ax9.spines["left"].set_visible(False)
+ax9.spines["right"].set_visible(True)
+ax9.tick_params(labelright=True, right=True, labelleft=False, left=False)
+ax9.axhline(10, linewidth=0.8, linestyle="--", color="black")
+plt.xlabel("X (km)")
+
+
+F.save_light(path=plot_path, name=str(ID_name) + "_B06_atten_ov")
+F.save_pup(path=plot_path, name=str(ID_name) + "_B06_atten_ov")
+
+
+# reconstruct slope displacement data
+def fit_offset(x, data, model, nan_mask, deg):
+    p_offset = np.polyfit(x[~nan_mask], data[~nan_mask] - model[~nan_mask], deg)
+    p_offset[-1] = 0
+    poly_offset = np.polyval(p_offset, x)
+    return poly_offset
+
+
+def tanh_fitler(x, x_cutoff, sigma_g=0.01):
+    """
+    zdgfsg
+    """
+
+    decay = 0.5 - np.tanh((x - x_cutoff) / sigma_g) / 2
+    return decay
+
+
+def reconstruct_displacement(Gx_1, Gk_1, T3, k_thresh):
+    """
+    reconstructs photon displacement heights for each stancil given the model parameters in Gk_1
+    A low-pass frequeny filter can be applied using k-thresh
+
+    inputs:
+    Gk_1    model data per stencil from _gFT_k file with sin and cos coefficients
+    Gx_1    real data per stencil from _gFT_x file with mean photon heights and coordindate systems
+    T3
+    k_thresh (None) threshold for low-pass filter
+
+    returns:
+    height_model  reconstucted displements heights of the stancil
+    poly_offset   fitted staight line to the residual between observations and model to account for low-pass variability
+    nan_mask      mask where is observed data in
+    """
+
+    dist_stencil = Gx_1.eta + Gx_1.x
+    dist_stencil_lims = dist_stencil[0].data, dist_stencil[-1].data
+
+    gFT_cos_coeff_sel = np.copy(Gk_1.gFT_cos_coeff)
+    gFT_sin_coeff_sel = np.copy(Gk_1.gFT_sin_coeff)
+
+    gFT_cos_coeff_sel = gFT_cos_coeff_sel * tanh_fitler(Gk_1.k, k_thresh, sigma_g=0.003)
+    gFT_sin_coeff_sel = gFT_sin_coeff_sel * tanh_fitler(Gk_1.k, k_thresh, sigma_g=0.003)
+
+    FT_int = gFT.generalized_Fourier(Gx_1.eta + Gx_1.x, None, Gk_1.k)
+    _ = FT_int.get_H()
+    FT_int.p_hat = np.concatenate(
+        [-gFT_sin_coeff_sel / Gk_1.k, gFT_cos_coeff_sel / Gk_1.k]
+    )
+
+    dx = Gx.eta.diff("eta").mean().data
+    height_model = FT_int.model() / dx
+    dist_nanmask = np.isnan(Gx_1.y_data)
+    height_data = np.interp(
+        dist_stencil, T3_sel["dist"], T3_sel["heights_c_weighted_mean"]
+    )
+    return height_model, np.nan, dist_nanmask
+
+
+# cutting Table data
+G_height_model = dict()
+k = "gt2l"
+for bb in Gx.beam.data:
+    G_height_model_temp = dict()
+    for i in np.arange(Gx.x.size):
+        Gx_1 = Gx.isel(x=i).sel(beam=bb)
+        Gk_1 = Gk.isel(x=i).sel(beam=bb)
+        k_thresh = G_gFT_smth.k_lim.isel(x=0).data
+
+        dist_stencil = Gx_1.eta + Gx_1.x
+        dist_stencil_lims = dist_stencil[0].data, dist_stencil[-1].data
+        dist_stencil_lims_plot = dist_stencil_lims
+        dist_stencil_lims_plot = Gx_1.eta[0] * 1 + Gx_1.x, Gx_1.eta[-1] * 1 + Gx_1.x
+
+        T3_sel = B3[k].loc[
+            (
+                (B3[k]["dist"] >= dist_stencil_lims[0])
+                & (B3[k]["dist"] <= dist_stencil_lims[1])
+            )
+        ]
+
+        if T3_sel.shape[0] != 0:
+            height_model, poly_offset, dist_nanmask = reconstruct_displacement(
+                Gx_1, Gk_1, T3_sel, k_thresh=k_thresh
+            )
+            poly_offset = poly_offset * 0
+            G_height_model_temp[str(i) + bb] = xr.DataArray(
+                height_model, coords=Gx_1.coords, dims=Gx_1.dims, name="height_model"
+            )
+        else:
+            G_height_model_temp[str(i) + bb] = xr.DataArray(
+                Gx_1.y_model.data,
+                coords=Gx_1.coords,
+                dims=Gx_1.dims,
+                name="height_model",
+            )
+
+    G_height_model[bb] = xr.concat(G_height_model_temp.values(), dim="x").T
+
+Gx["height_model"] = xr.concat(G_height_model.values(), dim="beam").transpose(
+    "eta", "beam", "x"
+)
+
+Gx_v2, B2_v2, B3_v2 = dict(), dict(), dict()
+for bb in Gx.beam.data:
+    print(bb)
+    Gx_k = Gx.sel(beam=bb)
+    Gh = Gx["height_model"].sel(beam=bb).T
+    Gh_err = Gx_k["model_error_x"].T
+    Gnans = np.isnan(Gx_k.y_model)
+
+    concented_heights = Gh.data.reshape(Gh.data.size)
+    concented_err = Gh_err.data.reshape(Gh.data.size)
+    concented_nans = Gnans.data.reshape(Gnans.data.size)
+    concented_x = (Gh.x + Gh.eta).data.reshape(Gh.data.size)
+
+    dx = Gh.eta.diff("eta")[0].data
+    continous_x_grid = np.arange(concented_x.min(), concented_x.max(), dx)
+    continous_height_model = np.interp(continous_x_grid, concented_x, concented_heights)
+    concented_err = np.interp(continous_x_grid, concented_x, concented_err)
+    continous_nans = np.interp(continous_x_grid, concented_x, concented_nans) == 1
+
+    T3 = B3[bb]
+    T3 = T3.sort_values("x")
+    T3 = T3.sort_values("dist")
+
+    T3["heights_c_model"] = np.interp(
+        T3["dist"], continous_x_grid, continous_height_model
+    )
+    T3["heights_c_model_err"] = np.interp(T3["dist"], continous_x_grid, concented_err)
+    T3["heights_c_residual"] = T3["heights_c_weighted_mean"] - T3["heights_c_model"]
+
+    B3_v2[bb] = T3
+    Gx_v2[bb] = Gx_k
+
+try:
+    G_angle = xr.open_dataset(load_path_angle + "/B05_" + ID_name + "_angle_pdf.nc")
+
+    font_for_pres()
+
+    Ga_abs = (
+        G_angle.weighted_angle_PDF_smth.isel(angle=G_angle.angle > 0).data
+        + G_angle.weighted_angle_PDF_smth.isel(angle=G_angle.angle < 0).data[:, ::-1]
+    ) / 2
+    Ga_abs = xr.DataArray(
+        data=Ga_abs.T,
+        dims=G_angle.dims,
+        coords=G_angle.isel(angle=G_angle.angle > 0).coords,
+    )
+
+    Ga_abs_front = Ga_abs.isel(x=slice(0, 3))
+    Ga_best = (Ga_abs_front * Ga_abs_front.N_data).sum("x") / Ga_abs_front.N_data.sum(
+        "x"
+    )
+
+    theta = Ga_best.angle[Ga_best.argmax()].data
+    theta_flag = True
+
+    font_for_print()
+    F = M.figure_axis_xy(3, 5, view_scale=0.7)
+
+    plt.subplot(2, 1, 1)
+    plt.pcolor(Ga_abs)
+    plt.xlabel("abs angle")
+    plt.ylabel("x")
+
+    ax = plt.subplot(2, 1, 2)
+    Ga_best.plot()
+    plt.title("angle front " + str(theta * 180 / np.pi), loc="left")
+    ax.axvline(theta, color="red")
+    F.save_light(path=plot_path, name="B06_angle_def")
+except:
+    print("no angle data found, skip angle corretion")
+    theta = 0
+    theta_flag = False
+
+# %%
+lam_p = 2 * np.pi / Gk.k
+lam = lam_p * np.cos(theta)
+
+if theta_flag:
+    k_corrected = 2 * np.pi / lam
+    x_corrected = Gk.x * np.cos(theta)
+else:
+    k_corrected = 2 * np.pi / lam * np.nan
+    x_corrected = Gk.x * np.cos(theta) * np.nan
+
+# spectral save
+G5 = G_gFT_wmean.expand_dims(dim="beam", axis=1)
+G5.coords["beam"] = ["weighted_mean"]
+G5 = G5.assign_coords(N_photons=G5.N_photons)
+G5["N_photons"] = G5["N_photons"].expand_dims("beam")
+G5["N_per_stancil_fraction"] = G5["N_per_stancil_fraction"].expand_dims("beam")
+
+Gk_v2 = xr.merge([Gk, G5])
+
+Gk_v2 = Gk_v2.assign_coords(x_corrected=("x", x_corrected.data)).assign_coords(
+    k_corrected=("k", k_corrected.data)
+)
+
+Gk_v2.attrs["best_guess_incident_angle"] = theta
+
+# save collected spectral data
+Gk_v2.to_netcdf(save_path + "/B06_" + ID_name + "_gFT_k_corrected.nc")
+Gx
+# save real space data
+Gx.to_netcdf(save_path + "/B06_" + ID_name + "_gFT_x_corrected.nc")
+try:
+    io.save_pandas_table(
+        B2_v2, "B06_" + ID_name + "_B06_corrected_resid", save_path
+    )  # all photos but heights adjusted and with distance coordinate
+except:
+    os.remove(save_path + "B06_" + ID_name + "_B06_corrected_resid.h5")
+    io.save_pandas_table(
+        B2_v2, "B06_" + ID_name + "_B06_corrected_resid", save_path
+    )  # all photos but heights adjusted and with distance coordinate
+
+try:
+    io.save_pandas_table(
+        B3_v2, "B06_" + ID_name + "_binned_resid", save_path
+    )  # regridding heights
+except:
+    os.remove(save_path + "B06_" + ID_name + "_binned_resid.h5")
+    io.save_pandas_table(
+        B3_v2, "B06_" + ID_name + "_binned_resid", save_path
+    )  # regridding heights
+
+MT.json_save(
+    "B06_success",
+    plot_path + "../",
+    {"time": time.asctime(time.localtime(time.time()))},
+)
+print("done. saved target at " + plot_path + "../B06_success")

From d586a66c64953d7944bcb73ede2348e60803c876 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Thu, 18 Jan 2024 10:40:26 -0500
Subject: [PATCH 03/30] chore: clean up generalized_FT Reorganize imports,
 remove unused names, simple refactorings

---
 .../ICEsat2_SI_tools/generalized_FT.py        | 22 +++++--------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py
index 3e88f22a..355d50b3 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py
@@ -1,8 +1,6 @@
 import numpy as np
-
-import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec
-import icesat2_tracks.ICEsat2_SI_tools.lanczos as lanczos
 import matplotlib.pyplot as plt
+from icesat2_tracks.ICEsat2_SI_tools import lanczos, spectral_estimates as spec
 
 
 def rebin(data, dk):
@@ -69,7 +67,7 @@ def get_weights_from_data(
     pars = Spec_fft.set_parameters(flim=np.sqrt(9.81 * k[-1]) / 2 / np.pi)
     k_max = (pars["f_max"].value * 2 * np.pi) ** 2 / 9.81
 
-    if method is "gaussian":
+    if method == "gaussian":
         # simple gaussian weight
         def gaus(x, x_0, amp, sigma_g):
             return amp * np.exp(-0.5 * ((x - x_0) / sigma_g) ** 2)
@@ -77,7 +75,7 @@ def gaus(x, x_0, amp, sigma_g):
         weight = gaus(k, k_max, 1, 0.02) ** (1 / 2)
         params = None
 
-    elif method is "parametric":
+    elif method == "parametric":
         # JONSWAP weight
         f = np.sqrt(9.81 * k) / (2 * np.pi)
         weight = Spec_fft.create_weight(freq=f, plot_flag=False, max_nfev=max_nfev)
@@ -136,7 +134,7 @@ def define_weights(stancil, prior, x, y, dx, k, max_nfev, plot_flag=False):
     return weights normalized to 1, prior_pars used for the next iteration
     """
 
-    if (type(prior[0]) is bool) and not prior[0]:
+    if isinstance(prior[0], bool) and not prior[0]:
         # fit function to data
         weight, prior_pars = get_weights_from_data(
             x, y, dx, stancil, k, max_nfev, plot_flag=plot_flag, method="parametric"
@@ -246,7 +244,6 @@ def calc_gFT_apply(stancil, prior):
             windows the data accoding to stencil and applies LS spectrogram
             returns: stancil center, spectrum for this stencil, number of datapoints in stancil
             """
-            from scipy.signal import detrend
             import matplotlib.pyplot as plt
             import time
 
@@ -338,10 +335,7 @@ def calc_gFT_apply(stancil, prior):
             inverse_stats = FT.get_stats(self.dk, Lpoints_full, print_flag=plot_flag)
             # add fitting parameters of Prior to stats dict
             for k, I in prior_pars.items():
-                try:
-                    inverse_stats[k] = I.value
-                except:
-                    inverse_stats[k] = np.nan
+                inverse_stats[k] = I.value if hasattr(I, "value") else np.nan
 
             print("compute time stats : ", time.perf_counter() - ta)
 
@@ -637,7 +631,6 @@ def parceval(self, add_attrs=True, weight_data=False):
         import copy
 
         DATA = self.data
-        L = self.Lmeters
         X = self.x
 
         def get_stancil_var_apply(stancil):
@@ -757,7 +750,6 @@ def __init__(self, x, ydata, k):
         non_dimensionalize (bool, default=True) if True, then the data and R_data_uncertainty is non-dimensionalized by the std of the data
         """
         import numpy as np
-        from numpy import linalg
 
         self.x, self.ydata, self.k = x, ydata, k
         self.M = self.k.size  # number of wavenumbers
@@ -769,7 +761,7 @@ def __init__(self, x, ydata, k):
             # test if the data is real, not nan and not inf
             assert np.isrealobj(self.ydata), "data is not real"
             assert np.isfinite(self.ydata).all(), "data is not finite"
-            assert np.isnan(self.ydata).all() == False, "data is not nan"
+            assert not np.isnan(self.ydata).all(), "data is not nan"
 
     # data matrix
     def get_H(self, xx=None):
@@ -865,7 +857,6 @@ def parceval(self, dk, Nx_full):
     def get_stats(self, dk, Nx_full, print_flag=False):
         residual = self.ydata - self.model()
 
-        Lmeters = self.x[-1] - self.x[0]
         pars = {
             "data_var": self.ydata.var(),
             "model_var": self.model().var(),
@@ -951,7 +942,6 @@ def model_func(self, f, params):
     def non_dim_spec_model(self, f, f_max, amp, gamma=1, angle_rad=0):
         import icesat2_tracks.local_modules.JONSWAP_gamma as spectal_models
 
-        U = 20  # results are incensitive to U
         f_true = f * np.cos(angle_rad)
         model = spectal_models.JONSWAP_default_alt(f_true, f_max, 20, gamma=gamma)
         model = amp * model / np.nanmean(model)

From 6aac4408a3df8b9151539b9bad5a9428a627e8fc Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Mon, 22 Jan 2024 16:14:53 -0500
Subject: [PATCH 04/30] fix: isolate data pull

---
 src/icesat2_tracks/ICEsat2_SI_tools/io.py     | 795 ++++++++++--------
 .../analysis_db/B01_SL_load_single_file.py    |  39 +-
 2 files changed, 468 insertions(+), 366 deletions(-)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/io.py b/src/icesat2_tracks/ICEsat2_SI_tools/io.py
index 521e305c..f3349744 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/io.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/io.py
@@ -1,41 +1,50 @@
+from sliderule import icesat2
+from icesat2_tracks.ICEsat2_SI_tools import sliderule_converter_tools as sct
 
 
 def init_from_input(arguments):
-    if (len(arguments) <= 1) | ('-f' in set(arguments) ) :
+    """
+    Initializes the variables track_name, batch_key, and test_flag based on the input arguments.
+
+    Parameters:
+    arguments (list): A list of input arguments.
 
-        track_name='20190605061807_10380310_004_01'
-        batch_key='SH_batch01'
+    Returns:
+    tuple: A tuple containing the values of track_name, batch_key, and test_flag.
+    """
+
+    if (len(arguments) <= 1) | ("-f" in set(arguments)):
+        track_name = "20190605061807_10380310_004_01"
+        batch_key = "SH_batch01"
         test_flag = True
-        print('use standard values')
+        print("use standard values")
 
     else:
+        track_name = arguments[1]
+        batch_key = arguments[2]
+        # $(hemisphere) $(coords) $(config)
 
-        track_name=arguments[1]
-        batch_key =arguments[2]
-        #$(hemisphere) $(coords) $(config)
-
-        print('read vars from file: ' +str(arguments[1]) )
+        print("read vars from file: " + str(arguments[1]))
 
-        if (len(arguments) >= 4):
-            if arguments[3] == 'True':
+        if len(arguments) >= 4:
+            if arguments[3] == "True":
                 test_flag = True
-            elif arguments[3] == 'False':
+            elif arguments[3] == "False":
                 test_flag = False
             else:
-                test_flag= arguments[3]
+                test_flag = arguments[3]
 
-            print('test_flag found, test_flag= '+str(test_flag) )
+            print("test_flag found, test_flag= " + str(test_flag))
         else:
-            test_flag=False
+            test_flag = False
     print(track_name)
 
-
-    print('----- batch ='+ batch_key)
-    print('----- test_flag: ' + str(test_flag))
+    print("----- batch =" + batch_key)
+    print("----- test_flag: " + str(test_flag))
     return track_name, batch_key, test_flag
 
 
-def init_data(ID_name, batch_key, ID_flag, ID_root, prefix ='A01b_ID'):
+def init_data(ID_name, batch_key, ID_flag, ID_root, prefix="A01b_ID"):
     """
     Takes inputs and retrieves the ID, track_names that can be loaded, hemis, batch
     inputs: are the outputs from init_from_input, specifically
@@ -49,103 +58,146 @@ def init_data(ID_name, batch_key, ID_flag, ID_root, prefix ='A01b_ID'):
     """
 
     print(ID_name, batch_key, ID_flag)
-    hemis, batch = batch_key.split('_')
+    hemis, batch = batch_key.split("_")
 
     if ID_flag:
-        ID_path = ID_root +'/'+batch_key+'/'+prefix+'/'
-        ID      = json_load( prefix +'_'+ID_name, ID_path )
-        track_names = ID['tracks']['ATL03']
+        ID_path = ID_root + "/" + batch_key + "/" + prefix + "/"
+        ID = json_load(prefix + "_" + ID_name, ID_path)
+        track_names = ID["tracks"]["ATL03"]
 
     else:
-        track_names = ['ATL03_'+ID_name]
-        ID          = ID_name
+        track_names = ["ATL03_" + ID_name]
+        ID = ID_name
 
     return ID, track_names, hemis, batch
 
+
 def ID_to_str(ID_name):
     from datetime import datetime
 
-    IDs = ID_name.split('_')
-    date = datetime.strptime(IDs[1], '%Y%m%d').strftime('%Y-%m-%d')#.strftime('%m/%d/%Y')
-    date
-    return IDs[0] +' ' +date +' granule: ' + IDs[2]
+    IDs = ID_name.split("_")
+    date = datetime.strptime(IDs[1], "%Y%m%d").strftime("%Y-%m-%d")
+    return IDs[0] + " " + date + " granule: " + IDs[2]
+
+
+def get_gdf(ATL03_track_name, params_yapc, maximum_height):
+    print("Retrieving data from sliderule ...")
+    gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name])
+
+    if gdf.empty:
+        raise Exception("Empty Geodataframe. No data could be retrieved.")
+
+    print("Initial data retrieved")
+    gdf = sct.correct_and_remove_height(gdf, maximum_height)
+    return gdf
+
 
 class case_ID:
     """docstring for case_ID"""
+
     def __init__(self, track_name):
         import re
-        track_name_pattern = r'(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?'
-        case_ID_pattern = r'(\d{4})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})'
 
-        track_name_rx = re.compile(track_name_pattern)
-        self.hemis,self.YY,self.MM,self.DD,self.HH,self.MN,self.SS,self.TRK,self.CYC,self.GRN,self.RL,self.VRS = track_name_rx.findall(track_name).pop()
+        track_name_pattern = r"(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?"
 
-        if self.hemis == '01':
-            self.hemis = 'NH'
-        elif self.hemis == '02':
-            self.hemis = 'SH'
+        track_name_rx = re.compile(track_name_pattern)
+        (
+            self.hemis,
+            self.YY,
+            self.MM,
+            self.DD,
+            self.HH,
+            self.MN,
+            self.SS,
+            self.TRK,
+            self.CYC,
+            self.GRN,
+            self.RL,
+            self.VRS,
+        ) = track_name_rx.findall(track_name).pop()
+
+        if self.hemis == "01":
+            self.hemis = "NH"
+        elif self.hemis == "02":
+            self.hemis = "SH"
         else:
             self.hemis = self.hemis
-        #self.hemis = hemis
+        # self.hemis = hemis
         self.set()
         self.track_name_init = track_name
 
     def set(self):
-        block1 = (self.YY,self.MM,self.DD)
-        block2 = (self.TRK,self.CYC,self.GRN)
+        block1 = (self.YY, self.MM, self.DD)
+        block2 = (self.TRK, self.CYC, self.GRN)
 
-        self.ID = self.hemis+'_'+''.join(block1) +'_'+ ''.join(block2)
+        self.ID = self.hemis + "_" + "".join(block1) + "_" + "".join(block2)
         return self.ID
 
     def get_granule(self):
-        return ''.join((self.TRK,self.CYC,self.GRN))
+        return "".join((self.TRK, self.CYC, self.GRN))
 
     def set_dummy(self):
-        block1 = (self.YY,self.MM,self.DD)
-        block2 = (self.TRK,self.CYC,self.GRN)
+        block1 = (self.YY, self.MM, self.DD)
+        block2 = (self.TRK, self.CYC, self.GRN)
 
-        self.ID_dummy = ''.join(block1) +'_'+ ''.join(block2)
+        self.ID_dummy = "".join(block1) + "_" + "".join(block2)
         return self.ID_dummy
 
     def set_ATL03_trackname(self):
-
-        block1 = (self.YY,self.MM,self.DD)
-        block1b = (self.HH,self.MN,self.SS)
-        block2 = (self.TRK,self.CYC,self.GRN)
-        if self.RL is '':
+        block1 = (self.YY, self.MM, self.DD)
+        block1b = (self.HH, self.MN, self.SS)
+        block2 = (self.TRK, self.CYC, self.GRN)
+        if self.RL is "":
             raise ValueError("RL not set")
-        if self.VRS is '':
+        if self.VRS is "":
             raise ValueError("VRS not set")
 
-        block3 = (self.RL,self.VRS)
+        block3 = (self.RL, self.VRS)
 
-        self.ID_ATL03 = ''.join(block1) +''.join(block1b) +'_'+ ''.join(block2) +'_'+ '_'.join(block3)
+        self.ID_ATL03 = (
+            "".join(block1)
+            + "".join(block1b)
+            + "_"
+            + "".join(block2)
+            + "_"
+            + "_".join(block3)
+        )
         return self.ID_ATL03
 
     def set_ATL10_trackname(self):
-
-        block1 = (self.YY,self.MM,self.DD)
-        block1b = (self.HH,self.MN,self.SS)
-        block2 = (self.TRK,self.CYC, '01') # granule is alwasy '01' for ATL10
-        if self.RL is '':
+        block1 = (self.YY, self.MM, self.DD)
+        block1b = (self.HH, self.MN, self.SS)
+        block2 = (self.TRK, self.CYC, "01")  # granule is alwasy '01' for ATL10
+        if self.RL is "":
             raise ValueError("RL not set")
-        if self.VRS is '':
+        if self.VRS is "":
             raise ValueError("VRS not set")
 
-        block3 = (self.RL,self.VRS)
+        block3 = (self.RL, self.VRS)
 
-        if self.hemis == 'NH':
-            hemis = '01'
-        elif self.hemis == 'SH':
-            hemis = '02'
+        if self.hemis == "NH":
+            hemis = "01"
+        elif self.hemis == "SH":
+            hemis = "02"
         else:
             hemis = self.hemis
 
-        self.ID_ATL10 = hemis+'_'+''.join(block1) +''.join(block1b) +'_'+ ''.join(block2) +'_'+ '_'.join(block3)
+        self.ID_ATL10 = (
+            hemis
+            + "_"
+            + "".join(block1)
+            + "".join(block1b)
+            + "_"
+            + "".join(block2)
+            + "_"
+            + "_".join(block3)
+        )
         return self.ID_ATL10
 
 
-def nsidc_icesat2_get_associated_file(file_list, product, build=True, username=None, password=None):
+def nsidc_icesat2_get_associated_file(
+    file_list, product, build=True, username=None, password=None
+):
     """
     THis method returns assocociated files names and paths for files given
     in file_list for the "product" ICEsat2 product
@@ -163,105 +215,104 @@ def nsidc_icesat2_get_associated_file(file_list, product, build=True, username=N
     import posixpath
     import os
     import icesat2_toolkit.utilities
-    AUXILIARY=False
-    #product='ATL03'
-    DIRECTORY= None
-    FLATTEN=False
-    TIMEOUT=120
-    MODE=0o775
-    #file_list  = ['ATL07-01_20210301023054_10251001_005_01']
+
+    AUXILIARY = False
+    # product='ATL03'
+    DIRECTORY = None
+    FLATTEN = False
+    TIMEOUT = 120
+    MODE = 0o775
+    # file_list  = ['ATL07-01_20210301023054_10251001_005_01']
 
     if build and not (username or password):
-        urs = 'urs.earthdata.nasa.gov'
-        username,login,password = netrc.netrc().authenticators(urs)
-    #-- build urllib2 opener and check credentials
+        urs = "urs.earthdata.nasa.gov"
+        username, login, password = netrc.netrc().authenticators(urs)
+    # -- build urllib2 opener and check credentials
     if build:
-        #-- build urllib2 opener with credentials
+        # -- build urllib2 opener with credentials
         icesat2_toolkit.utilities.build_opener(username, password)
-        #-- check credentials
+        # -- check credentials
         icesat2_toolkit.utilities.check_credentials()
 
     parser = lxml.etree.HTMLParser()
-    #-- remote https server for ICESat-2 Data
-    HOST = 'https://n5eil01u.ecs.nsidc.org'
-    #-- regular expression operator for extracting information from files
-    rx = re.compile(r'(processed_)?(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})'
-        r'(\d{2})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})')
-    #-- regular expression pattern for finding specific files
-    regex_suffix = '(.*?)$' if AUXILIARY else '(h5)$'
-    remote_regex_pattern = (r'{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})'
-        r'(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_(\d{{2}})(.*?).{5}')
-
-    # rx = re.compile(r'(processed_)?(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})'
-    #     r'(\d{2})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})(.*?).h5$')
-    # #-- regular expression pattern for finding specific files
-    # regex_suffix = '(.*?)$' if AUXILIARY else '(h5)$'
-    # remote_regex_pattern = (r'{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})'
-    #     r'(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_(\d{{2}})(.*?).{5}')
-
-    #-- build list of remote files, remote modification times and local files
+    # -- remote https server for ICESat-2 Data
+    HOST = "https://n5eil01u.ecs.nsidc.org"
+    # -- regular expression operator for extracting information from files
+    rx = re.compile(
+        r"(processed_)?(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})"
+        r"(\d{2})(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})"
+    )
+    # -- regular expression pattern for finding specific files
+    regex_suffix = "(.*?)$" if AUXILIARY else "(h5)$"
+    remote_regex_pattern = (
+        r"{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})"
+        r"(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_(\d{{2}})(.*?).{5}"
+    )
+
+    # -- build list of remote files, remote modification times and local files
     original_files = []
     remote_files = []
     remote_mtimes = []
     local_files = []
-    remote_names =[]
+    remote_names = []
 
     for input_file in file_list:
-        #print(input_file)
-        #-- extract parameters from ICESat-2 ATLAS HDF5 file name
-        SUB,PRD,HEM,YY,MM,DD,HH,MN,SS,TRK,CYC,GRN,RL,VRS = \
-            rx.findall(input_file).pop()
-        #-- get directories from remote directory
-        product_directory = '{0}.{1}'.format(product,RL)
-        sd = '{0}.{1}.{2}'.format(YY,MM,DD)
-        PATH = [HOST,'ATLAS',product_directory,sd]
-        #-- local and remote data directories
-        remote_dir=posixpath.join(*PATH)
-        temp=os.path.dirname(input_file) if (DIRECTORY is None) else DIRECTORY
-        local_dir=os.path.expanduser(temp) if FLATTEN else os.path.join(temp,sd)
-        #-- create output directory if not currently existing
+        # print(input_file)
+        # -- extract parameters from ICESat-2 ATLAS HDF5 file name
+        SUB, PRD, HEM, YY, MM, DD, HH, MN, SS, TRK, CYC, GRN, RL, VRS = rx.findall(
+            input_file
+        ).pop()
+        # -- get directories from remote directory
+        product_directory = "{0}.{1}".format(product, RL)
+        sd = "{0}.{1}.{2}".format(YY, MM, DD)
+        PATH = [HOST, "ATLAS", product_directory, sd]
+        # -- local and remote data directories
+        remote_dir = posixpath.join(*PATH)
+        temp = os.path.dirname(input_file) if (DIRECTORY is None) else DIRECTORY
+        local_dir = os.path.expanduser(temp) if FLATTEN else os.path.join(temp, sd)
+        # -- create output directory if not currently existing
         # if not os.access(local_dir, os.F_OK):
         #     os.makedirs(local_dir, MODE)
-        #-- compile regular expression operator for file parameters
-        args = (product,TRK,CYC,GRN,RL,regex_suffix)
+        # -- compile regular expression operator for file parameters
+        args = (product, TRK, CYC, GRN, RL, regex_suffix)
         R1 = re.compile(remote_regex_pattern.format(*args), re.VERBOSE)
-        #-- find associated ICESat-2 data file
-        #-- find matching files (for granule, release, version, track)
-        colnames,collastmod,colerror=icesat2_toolkit.utilities.nsidc_list(PATH,
-            build=False,
-            timeout=TIMEOUT,
-            parser=parser,
-            pattern=R1,
-            sort=True)
+        # -- find associated ICESat-2 data file
+        # -- find matching files (for granule, release, version, track)
+        colnames, collastmod, colerror = icesat2_toolkit.utilities.nsidc_list(
+            PATH, build=False, timeout=TIMEOUT, parser=parser, pattern=R1, sort=True
+        )
         print(colnames)
-        #-- print if file was not found
+        # -- print if file was not found
         if not colnames:
             print(colerror)
             continue
-        #-- add to lists
-        for colname,remote_mtime in zip(colnames,collastmod):
-            #-- save original file to list (expands if getting auxiliary files)
+        # -- add to lists
+        for colname, remote_mtime in zip(colnames, collastmod):
+            # -- save original file to list (expands if getting auxiliary files)
             original_files.append(input_file)
-            #-- remote and local versions of the file
-            remote_files.append(posixpath.join(remote_dir,colname))
-            local_files.append(os.path.join(local_dir,colname))
+            # -- remote and local versions of the file
+            remote_files.append(posixpath.join(remote_dir, colname))
+            local_files.append(os.path.join(local_dir, colname))
             remote_mtimes.append(remote_mtime)
             remote_names.append(colname)
 
-    return original_files, remote_files, remote_names #product_directory, sd,
+    return original_files, remote_files, remote_names  # product_directory, sd,
+
 
 def json_load(name, path, verbose=False):
     import json
     import os
-    full_name= (os.path.join(path,name+ '.json'))
 
-    with open(full_name, 'r') as ifile:
-        data=json.load(ifile)
+    full_name = os.path.join(path, name + ".json")
+
+    with open(full_name, "r") as ifile:
+        data = json.load(ifile)
     if verbose:
-        print('loaded from: ',full_name)
+        print("loaded from: ", full_name)
     return data
 
-def ATL03_download(username,password, dpath, product_directory, sd, file_name):
+
+def ATL03_download(username, password, dpath, product_directory, sd, file_name):
     """
     inputs:
     username: username for https://urs.earthdata.nasa.gov
@@ -272,102 +323,114 @@ def ATL03_download(username,password, dpath, product_directory, sd, file_name):
     file_name           'ATL03_20190301010737_09560204_005_01.h5' - filename in subdirectory
     """
     import icesat2_toolkit.utilities
-    from icesat2_toolkit.read_ICESat2_ATL03 import read_HDF5_ATL03
-    HOST = ['https://n5eil01u.ecs.nsidc.org','ATLAS',product_directory,sd, file_name]
-    # HOST = ['https://n5eil01u.ecs.nsidc.org','ATLAS','ATL03.003','2018.10.14',
-    #     'ATL03_20181014000347_02350101_003_01.h5']
-    print('download to:', dpath+'/'+HOST[-1])
-    buffer,error=icesat2_toolkit.utilities.from_nsidc(HOST,username=username,
-        password=password,local=dpath+'/'+HOST[-1],verbose=True)
-    #-- raise exception if download error
+
+    HOST = ["https://n5eil01u.ecs.nsidc.org", "ATLAS", product_directory, sd, file_name]
+    print("download to:", dpath + "/" + HOST[-1])
+    buffer, error = icesat2_toolkit.utilities.from_nsidc(
+        HOST,
+        username=username,
+        password=password,
+        local=dpath + "/" + HOST[-1],
+        verbose=True,
+    )
+    # -- raise exception if download error
     if not buffer:
         raise Exception(error)
 
-def save_pandas_table(table_dict, name , save_path):
+
+def save_pandas_table(table_dict, name, save_path):
     import os
+
     if not os.path.exists(save_path):
         os.makedirs(save_path)
 
     import warnings
     from pandas import HDFStore
     from pandas.io.pytables import PerformanceWarning
-    warnings.filterwarnings('ignore',category=PerformanceWarning)
 
-    with HDFStore(save_path+'/'+name+'.h5') as store:
-        for name,table in table_dict.items():
-                store[name]=table
+    warnings.filterwarnings("ignore", category=PerformanceWarning)
+
+    with HDFStore(save_path + "/" + name + ".h5") as store:
+        for name, table in table_dict.items():
+            store[name] = table
+
 
-def load_pandas_table_dict(name , save_path):
+def load_pandas_table_dict(name, save_path):
     import warnings
     from pandas import HDFStore
     from pandas.io.pytables import PerformanceWarning
-    warnings.filterwarnings('ignore',category=PerformanceWarning)
 
-    return_dict=dict()
-    with HDFStore(save_path+'/'+name+'.h5') as store:
-        #print(store)
-        #print(store.keys())
+    warnings.filterwarnings("ignore", category=PerformanceWarning)
+
+    return_dict = dict()
+    with HDFStore(save_path + "/" + name + ".h5") as store:
         for k in store.keys():
-            return_dict[k[1:]]=store.get(k)
+            return_dict[k[1:]] = store.get(k)
 
     return return_dict
 
-def get_beam_hdf_store(ATL03_k):
 
+def get_beam_hdf_store(ATL03_k):
     import pandas as pd
-    DD = pd.DataFrame()#columns = ATL03.keys())
+
+    DD = pd.DataFrame()  # columns = ATL03.keys())
     for ikey in ATL03_k.keys():
         DD[ikey] = ATL03_k[ikey]
 
     return DD
 
-def get_beam_var_hdf_store(ATL03_k, ikey):
 
+def get_beam_var_hdf_store(ATL03_k, ikey):
     import pandas as pd
-    DD = pd.DataFrame()#columns = ATL03.keys())
+
+    DD = pd.DataFrame()  # columns = ATL03.keys())
     DD[ikey] = ATL03_k[ikey]
     return DD
 
 
-def write_track_to_HDF5(data_dict, name, path, verbose=False, mode='w'):
+def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"):
     import os
     import h5py
-    mode = 'w' if mode is None else mode
+
+    mode = "w" if mode is None else mode
     if not os.path.exists(path):
         os.makedirs(path)
 
-    full_name= (os.path.join(path,name+ '.h5'))
+    full_name = os.path.join(path, name + ".h5")
     store = h5py.File(full_name, mode)
 
     for k in data_dict.keys():
         store1 = store.create_group(k)
         for kk, I in list(data_dict[k].items()):
-            store1[kk]=I
-        #store1.close()
+            store1[kk] = I
+        # store1.close()
 
     store.close()
 
     if verbose:
-        print('saved at: ' +full_name)
+        print("saved at: " + full_name)
 
 
 def get_time_for_track(delta_time, atlas_epoch):
     "returns pandas dataframe"
     import pandas as pd
-    import convert_GPS_time as cGPS
+    import icesat2_tracks.ICEsat2_SI_tools.convert_GPS_time as cGPS
+
     # Conversion of delta_time to a calendar date
     temp = cGPS.convert_GPS_time(atlas_epoch[0] + delta_time, OFFSET=0.0)
 
-    year = temp['year'][:].astype('int')
-    month = temp['month'][:].astype('int')
-    day = temp['day'][:].astype('int')
-    hour = temp['hour'][:].astype('int')
-    minute = temp['minute'][:].astype('int')
-    second = temp['second'][:].astype('int')
+    year = temp["year"][:].astype("int")
+    month = temp["month"][:].astype("int")
+    day = temp["day"][:].astype("int")
+    hour = temp["hour"][:].astype("int")
+    second = temp["second"][:].astype("int")
+
+    return pd.DataFrame(
+        {"year": year, "month": month, "day": day, "hour": hour, "second": second}
+    )
 
-    return pd.DataFrame({'year':year, 'month':month, 'day':day, 'hour':hour, 'second':second})
 
-def getATL03_beam(fileT, numpy=False, beam='gt1l', maxElev=1e6):
+def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
     """
     returns 'beam' from fileT as pandas table.
     fillT   path of file
@@ -377,78 +440,66 @@ def getATL03_beam(fileT, numpy=False, beam='gt1l', maxElev=1e6):
     """
     # Add in a proper description of the function here
     import h5py
-    import convert_GPS_time as cGPS
     import pandas as pd
+
     # Open the file
-    ATL03       =   h5py.File(fileT, 'r')
-    lons        =   ATL03[beam+'/heights/lon_ph'][:]
-    lats        =   ATL03[beam+'/heights/lat_ph'][:]
+    ATL03 = h5py.File(fileT, "r")
+    lons = ATL03[beam + "/heights/lon_ph"][:]
+    lats = ATL03[beam + "/heights/lat_ph"][:]
 
     # Along track distance from equator i think.
-    along_track_distance=ATL03[beam+'/heights/dist_ph_along'][:]
-    across_track_distance=ATL03[beam+'/heights/dist_ph_across'][:]
-    #dem_h = ATL03[beam+'/geophys_corr/dem_h'][:]
-    #delta_time_dem_h = ATL03[beam+'/geophys_corr/delta_time'][:]
-    segment_dist_x=ATL03[beam+'/geolocation/segment_dist_x'][:]
-    segment_length=ATL03[beam+'/geolocation/segment_length'][:]
-    segment_id = ATL03[beam+'/geolocation/segment_id'][:]
+    along_track_distance = ATL03[beam + "/heights/dist_ph_along"][:]
+    across_track_distance = ATL03[beam + "/heights/dist_ph_across"][:]
+    segment_dist_x = ATL03[beam + "/geolocation/segment_dist_x"][:]
+    segment_length = ATL03[beam + "/geolocation/segment_length"][:]
+    segment_id = ATL03[beam + "/geolocation/segment_id"][:]
 
-    delta_time_geolocation  =   ATL03[beam+'/geolocation/delta_time'][:]
-    reference_photon_index=   ATL03[beam+'/geolocation/reference_photon_index'][:]
-    ph_index_beg=   ATL03[beam+'/geolocation/ph_index_beg'][:]
+    delta_time_geolocation = ATL03[beam + "/geolocation/delta_time"][:]
+    reference_photon_index = ATL03[beam + "/geolocation/reference_photon_index"][:]
+    ph_index_beg = ATL03[beam + "/geolocation/ph_index_beg"][:]
 
-
-    ph_id_count  =   ATL03[beam+'/heights/ph_id_count'][:]
+    ph_id_count = ATL03[beam + "/heights/ph_id_count"][:]
     #  Nathan says it's the number of seconds since the GPS epoch on midnight Jan. 6, 1980
-    delta_time  =   ATL03[beam+'/heights/delta_time'][:]
-    #podppd_flag=ATL03[beam+'/geolocation/podppd_flag'][:]
-
-    # #Add this value to delta time parameters to compute full gps_seconds
-    atlas_epoch =   ATL03['/ancillary_data/atlas_sdp_gps_epoch'][:]
-
-    # Conversion of delta_time to a calendar date
-    temp = cGPS.convert_GPS_time(atlas_epoch[0] + delta_time, OFFSET=0.0)
-
-    # Express delta_time relative to start time of granule
-    #delta_time_granule=delta_time-delta_time[0]
-
-    # year = temp['year'][:].astype('int')
-    # month = temp['month'][:].astype('int')
-    # day = temp['day'][:].astype('int')
-    # hour = temp['hour'][:].astype('int')
-    # minute = temp['minute'][:].astype('int')
-    # second = temp['second'][:].astype('int')
+    delta_time = ATL03[beam + "/heights/delta_time"][:]
 
     # Primary variables of interest
 
     # Photon height
-    heights=ATL03[beam+'/heights/h_ph'][:]
-    #print(heights.shape)
+    heights = ATL03[beam + "/heights/h_ph"][:]
+    # print(heights.shape)
 
     # Flag for signal confidence
     # column index:  0=Land; 1=Ocean; 2=SeaIce; 3=LandIce; 4=InlandWater
     # values:
-        #-- -1: Events not associated with a specific surface type
-        #--  0: noise
-        #--  1: buffer but algorithm classifies as background
-        #--  2: low
-        #--  3: medium
-        #--  4: high
-
-    mask_ocean  = ATL03[beam+'/heights/signal_conf_ph'][:, 1] > 2 # ocean points  medium or high quality
-    mask_seaice = ATL03[beam+'/heights/signal_conf_ph'][:, 2] > 2 # sea ice points medium or high quality
-    mask_total  = (mask_seaice | mask_ocean)
-
-    if sum(~mask_total) == (ATL03[beam+'/heights/signal_conf_ph'][:, 1]).size:
-        print('zero photons, lower photon quality to 2 or higher')
-        mask_ocean  = ATL03[beam+'/heights/signal_conf_ph'][:, 1] > 1 # ocean points  medium or high quality
-        mask_seaice = ATL03[beam+'/heights/signal_conf_ph'][:, 2] > 1 # sea ice points medium or high quality
-        mask_total  = (mask_seaice | mask_ocean)
-
-    signal_confidence = ATL03[beam+'/heights/signal_conf_ph'][:, 1:3].max(1)
-    #print(signal_confidence.shape)
-
-    #return signal_confidence
+    # -- -1: Events not associated with a specific surface type
+    # --  0: noise
+    # --  1: buffer but algorithm classifies as background
+    # --  2: low
+    # --  3: medium
+    # --  4: high
+
+    mask_ocean = (
+        ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 2
+    )  # ocean points  medium or high quality
+    mask_seaice = (
+        ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 2
+    )  # sea ice points medium or high quality
+    mask_total = mask_seaice | mask_ocean
+
+    if sum(~mask_total) == (ATL03[beam + "/heights/signal_conf_ph"][:, 1]).size:
+        print("zero photons, lower photon quality to 2 or higher")
+        mask_ocean = (
+            ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 1
+        )  # ocean points  medium or high quality
+        mask_seaice = (
+            ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 1
+        )  # sea ice points medium or high quality
+        mask_total = mask_seaice | mask_ocean
+
+    signal_confidence = ATL03[beam + "/heights/signal_conf_ph"][:, 1:3].max(1)
+    # print(signal_confidence.shape)
+
+    # return signal_confidence
 
     # Add photon rate and background rate to the reader here
     ATL03.close()
@@ -458,26 +509,45 @@ def getATL03_beam(fileT, numpy=False, beam='gt1l', maxElev=1e6):
         return along_track_dist, elev
 
     else:
-        dF = pd.DataFrame({'heights':heights, 'lons':lons, 'lats':lats, 'signal_confidence':signal_confidence, 'mask_seaice':mask_seaice,
-                       'delta_time':delta_time, 'along_track_distance':along_track_distance, #'delta_time_granule':delta_time_granule,
-                        'across_track_distance':across_track_distance,'ph_id_count':ph_id_count})#,
-                        #'year':year, 'month':month, 'day':day, 'hour':hour,'minute':minute , 'second':second})
-
-        dF_seg = pd.DataFrame({'delta_time':delta_time_geolocation, 'segment_dist_x':segment_dist_x, 'segment_length':segment_length, 'segment_id':segment_id,
-                        'reference_photon_index':reference_photon_index, 'ph_index_beg':ph_index_beg})
+        dF = pd.DataFrame(
+            {
+                "heights": heights,
+                "lons": lons,
+                "lats": lats,
+                "signal_confidence": signal_confidence,
+                "mask_seaice": mask_seaice,
+                "delta_time": delta_time,
+                "along_track_distance": along_track_distance,  #'delta_time_granule':delta_time_granule,
+                "across_track_distance": across_track_distance,
+                "ph_id_count": ph_id_count,
+            }
+        )  # ,
+        #'year':year, 'month':month, 'day':day, 'hour':hour,'minute':minute , 'second':second})
+
+        dF_seg = pd.DataFrame(
+            {
+                "delta_time": delta_time_geolocation,
+                "segment_dist_x": segment_dist_x,
+                "segment_length": segment_length,
+                "segment_id": segment_id,
+                "reference_photon_index": reference_photon_index,
+                "ph_index_beg": ph_index_beg,
+            }
+        )
         # Filter out high elevation values
-        print('seg_dist shape ', segment_dist_x.shape)
-        print('df shape ',dF.shape)
+        print("seg_dist shape ", segment_dist_x.shape)
+        print("df shape ", dF.shape)
 
         dF = dF[mask_total]
-        #dF_seg = dF_seg[mask_total]
-        #print('df[mask] shape ',dF.shape)
+        # dF_seg = dF_seg[mask_total]
+        # print('df[mask] shape ',dF.shape)
 
         # Reset row indexing
-        #dF=dF#.reset_index(drop=True)
+        # dF=dF#.reset_index(drop=True)
         return dF, dF_seg
 
-def getATL03_height_correction(fileT, beam='gt1r'):
+
+def getATL03_height_correction(fileT, beam="gt1r"):
     """
     This method returns relevant data for wave estimates from ALT 07 tracks.
     returns: Pandas data frame
@@ -486,25 +556,27 @@ def getATL03_height_correction(fileT, beam='gt1r'):
 
     import h5py
     import pandas as pd
+
     # Open the file
-    ATL03 = h5py.File(fileT, 'r')
+    ATL03 = h5py.File(fileT, "r")
 
     ### bulk positions and statistics
     vars_bulk = [
-            'delta_time', # referenc time since equator crossing
-            'dem_h', # best giod approxiamtion
-            ]
+        "delta_time",  # referenc time since equator crossing
+        "dem_h",  # best giod approxiamtion
+    ]
 
-    D_bulk= dict()
+    D_bulk = dict()
     for var in vars_bulk:
-        D_bulk[var] = ATL03[beam+'/geophys_corr/'+var][:]
+        D_bulk[var] = ATL03[beam + "/geophys_corr/" + var][:]
     dF_bulk = pd.DataFrame(D_bulk)
 
     ATL03.close()
 
     return dF_bulk
 
-def getATL07_beam(fileT, beam='gt1r', maxElev=1e6):
+
+def getATL07_beam(fileT, beam="gt1r", maxElev=1e6):
     """
     This method returns relevant data for wave estimates from ALT 07 tracks.
     returns: Pandas data frame
@@ -513,81 +585,82 @@ def getATL07_beam(fileT, beam='gt1r', maxElev=1e6):
 
     import h5py
     import pandas as pd
+
     # Open the file
-    ATL07 = h5py.File(fileT, 'r')
+    ATL07 = h5py.File(fileT, "r")
 
     ### bulk positions and statistics
     vars_bulk = [
-            'longitude',
-            'latitude',
-            'height_segment_id',# Height segment ID (10 km segments)
-            'seg_dist_x' # Along track distance from the equator crossing to the segment center.
-            ]
+        "longitude",
+        "latitude",
+        "height_segment_id",  # Height segment ID (10 km segments)
+        "seg_dist_x",  # Along track distance from the equator crossing to the segment center.
+    ]
 
-    D_bulk= dict()
+    D_bulk = dict()
     for var in vars_bulk:
-        D_bulk[var] = ATL07[beam+'/sea_ice_segments/'+var]
+        D_bulk[var] = ATL07[beam + "/sea_ice_segments/" + var]
     dF_bulk = pd.DataFrame(D_bulk)
 
     #  Nathan says it's the number of seconds since the GPS epoch on midnight Jan. 6, 1980
-    delta_time=ATL07[beam+'/sea_ice_segments/delta_time'][:]
+    delta_time = ATL07[beam + "/sea_ice_segments/delta_time"][:]
     # #Add this value to delta time parameters to compute full gps_seconds
-    atlas_epoch=ATL07['/ancillary_data/atlas_sdp_gps_epoch'][:]
-    dF_time = get_time_for_track(delta_time,atlas_epoch)
-    dF_time['delta_time'] = delta_time
+    atlas_epoch = ATL07["/ancillary_data/atlas_sdp_gps_epoch"][:]
+    dF_time = get_time_for_track(delta_time, atlas_epoch)
+    dF_time["delta_time"] = delta_time
     ### Primary variables of interest
 
-
     vars = [
-            'across_track_distance', #Across track distance of photons averaged over the sea ice height segment.
-            'height_segment_asr_calc', #Computed apparent surface reflectance for the sea ice segment.
-            'height_segment_confidence',# # Height segment confidence flag
-            'height_segment_fit_quality_flag', # Flag Values: ['-1', '1', '2', '3', '4', '5']
-                                            #Flag Meanings: ['invalid', 'best', 'high', 'med', 'low', 'poor']
-            'height_segment_height',     # Beam segment height
-            'height_segment_length_seg',  # Along track length of segment
-            'height_segment_ssh_flag', #Flag for potential leads, 0=sea ice, 1 = sea surface
-            'height_segment_surface_error_est', #Error estimate of the surface height
-            'height_segment_type',# Flag Values: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
-                                  # Flag Meanings: ['cloud_covered', 'other', 'specular_lead_low_w_bkg', 'specular_lead_low', 'specular_lead_high_w_bkg', 'specular_lead_high', 'dark_lead_smooth_w_bkg', 'dark_lead_smooth'
-            'height_segment_w_gaussian', # Width of Gaussian fit
-            'height_segment_quality', # Height quality flag, 1 for good fit, 0 for bad
-            ]
-    #vars = ['beam_fb_height', 'beam_fb_sigma' , 'beam_fb_confidence' , 'beam_fb_quality_flag']
-
-    D_heights=dict()
+        "across_track_distance",  # Across track distance of photons averaged over the sea ice height segment.
+        "height_segment_asr_calc",  # Computed apparent surface reflectance for the sea ice segment.
+        "height_segment_confidence",  # # Height segment confidence flag
+        "height_segment_fit_quality_flag",  # Flag Values: ['-1', '1', '2', '3', '4', '5']
+        # Flag Meanings: ['invalid', 'best', 'high', 'med', 'low', 'poor']
+        "height_segment_height",  # Beam segment height
+        "height_segment_length_seg",  # Along track length of segment
+        "height_segment_ssh_flag",  # Flag for potential leads, 0=sea ice, 1 = sea surface
+        "height_segment_surface_error_est",  # Error estimate of the surface height
+        "height_segment_type",  # Flag Values: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+        # Flag Meanings: ['cloud_covered', 'other', 'specular_lead_low_w_bkg', 'specular_lead_low', 'specular_lead_high_w_bkg', 'specular_lead_high', 'dark_lead_smooth_w_bkg', 'dark_lead_smooth'
+        "height_segment_w_gaussian",  # Width of Gaussian fit
+        "height_segment_quality",  # Height quality flag, 1 for good fit, 0 for bad
+    ]
+    # vars = ['beam_fb_height', 'beam_fb_sigma' , 'beam_fb_confidence' , 'beam_fb_quality_flag']
+
+    D_heights = dict()
     for var in vars:
-        D_heights[var] = ATL07[beam+'/sea_ice_segments/heights/' +var][:]
+        D_heights[var] = ATL07[beam + "/sea_ice_segments/heights/" + var][:]
     dF_heights = pd.DataFrame(D_heights)
 
-
     vars_env = {
-            'mss':'geophysical/height_segment_mss', # Mean sea surface height above WGS-84 reference ellipsoid (range: -105 to 87m), based on the DTU13 model.
-            't2m':'geophysical/height_segment_t2m',#Temperature at 2m above the displacement height (K)
-            'u2m':'geophysical/height_segment_u2m',#Eastward wind at 2m above the displacement height (m/s-1)
-            'v2m':'geophysical/height_segment_v2m',#Northward wind at 2m above the displacement height (m/s-1)
-            'n_photons_actual':'stats/n_photons_actual', # Number of photons gathered
-            'photon_rate':'stats/photon_rate', #photon_rate
+        "mss": "geophysical/height_segment_mss",  # Mean sea surface height above WGS-84 reference ellipsoid (range: -105 to 87m), based on the DTU13 model.
+        "t2m": "geophysical/height_segment_t2m",  # Temperature at 2m above the displacement height (K)
+        "u2m": "geophysical/height_segment_u2m",  # Eastward wind at 2m above the displacement height (m/s-1)
+        "v2m": "geophysical/height_segment_v2m",  # Northward wind at 2m above the displacement height (m/s-1)
+        "n_photons_actual": "stats/n_photons_actual",  # Number of photons gathered
+        "photon_rate": "stats/photon_rate",  # photon_rate
     }
 
-    D_env=dict()
-    for var,I in vars_env.items():
-        D_env[  var] = ATL07[beam+'/sea_ice_segments/' +I][:]
+    D_env = dict()
+    for var, I in vars_env.items():
+        D_env[var] = ATL07[beam + "/sea_ice_segments/" + I][:]
     dF_env = pd.DataFrame(D_env)
 
-
-    #Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
-    DF = pd.concat({ 'time': dF_time,  'ref': dF_bulk, 'heights': dF_heights, 'env': dF_env }, axis=1)
+    # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
+    DF = pd.concat(
+        {"time": dF_time, "ref": dF_bulk, "heights": dF_heights, "env": dF_env}, axis=1
+    )
 
     ATL07.close()
 
     # Filter out high elevation values
-    DF = DF[(DF['heights']['height_segment_height']<maxElev)]
+    DF = DF[(DF["heights"]["height_segment_height"] < maxElev)]
     # Reset row indexing
-    DF=DF.reset_index(drop=True)
+    DF = DF.reset_index(drop=True)
     return DF
 
-def getATL10_beam(fileT, beam='gt1r', maxElev=1e6):
+
+def getATL10_beam(fileT, beam="gt1r", maxElev=1e6):
     """
     This method returns relevant data for wave estimates from ALT 10 tracks.
     returns: Pandas data frames one for sea ice heights and one for leads
@@ -596,56 +669,85 @@ def getATL10_beam(fileT, beam='gt1r', maxElev=1e6):
 
     import h5py
     import pandas as pd
+
     # Open the file
-    ATL07 = h5py.File(fileT, 'r')
+    ATL07 = h5py.File(fileT, "r")
 
     ### bulk positions and statistics
-    #f['gt1r/freeboard_beam_segment/beam_freeboard'].keys()
+    # f['gt1r/freeboard_beam_segment/beam_freeboard'].keys()
 
     vars_bulk = [
-            'seg_dist_x', 'latitude', 'longitude', 'height_segment_id',
-            'beam_fb_confidence', 'beam_fb_height', 'beam_fb_quality_flag', 'beam_fb_sigma'
-            ]
-
-    D_bulk= dict()
+        "seg_dist_x",
+        "latitude",
+        "longitude",
+        "height_segment_id",
+        "beam_fb_confidence",
+        "beam_fb_height",
+        "beam_fb_quality_flag",
+        "beam_fb_sigma",
+    ]
+
+    D_bulk = dict()
     for var in vars_bulk:
-        D_bulk[var] = ATL07[beam+'/freeboard_beam_segment/beam_freeboard/'+var]
+        D_bulk[var] = ATL07[beam + "/freeboard_beam_segment/beam_freeboard/" + var]
     dF_bulk = pd.DataFrame(D_bulk)
 
     #  Nathan says it's the number of seconds since the GPS epoch on midnight Jan. 6, 1980
-    delta_time=ATL07[beam+'/freeboard_beam_segment/height_segments/delta_time'][:]
+    delta_time = ATL07[beam + "/freeboard_beam_segment/height_segments/delta_time"][:]
     # #Add this value to delta time parameters to compute full gps_seconds
-    atlas_epoch=ATL07['/ancillary_data/atlas_sdp_gps_epoch'][:]
-    dF_time = get_time_for_track(delta_time,atlas_epoch)
-    dF_time['delta_time'] = delta_time
+    atlas_epoch = ATL07["/ancillary_data/atlas_sdp_gps_epoch"][:]
+    dF_time = get_time_for_track(delta_time, atlas_epoch)
+    dF_time["delta_time"] = delta_time
 
     ### Primary variables of interest
-    vars = ['height_segment_height','height_segment_length_seg', 'latitude', 'longitude', 'photon_rate', 'height_segment_type', 'height_segment_ssh_flag','ice_conc']
-
-    D_heights=dict()
+    vars = [
+        "height_segment_height",
+        "height_segment_length_seg",
+        "latitude",
+        "longitude",
+        "photon_rate",
+        "height_segment_type",
+        "height_segment_ssh_flag",
+        "ice_conc",
+    ]
+
+    D_heights = dict()
     for var in vars:
-        D_heights[var] = ATL07[beam+'/freeboard_beam_segment/height_segments/' +var][:]
+        D_heights[var] = ATL07[beam + "/freeboard_beam_segment/height_segments/" + var][
+            :
+        ]
     dF_heights = pd.DataFrame(D_heights)
 
-    #Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
-    DF = pd.concat({ 'time': dF_time,  'ref': dF_bulk, 'freeboard': dF_heights }, axis=1)
+    # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
+    DF = pd.concat({"time": dF_time, "ref": dF_bulk, "freeboard": dF_heights}, axis=1)
 
     # Filter out high elevation values
-    DF = DF[(DF['freeboard']['height_segment_height']<maxElev)]
+    DF = DF[(DF["freeboard"]["height_segment_height"] < maxElev)]
     # Reset row indexing
     DF = DF.reset_index(drop=True)
 
     # get leads as well
-    vars_leads = ['delta_time', 'latitude', 'lead_dist_x', 'lead_height', 'lead_length', 'lead_sigma', 'longitude', 'ssh_n', 'ssh_ndx']
-
-    D_leads=dict()
+    vars_leads = [
+        "delta_time",
+        "latitude",
+        "lead_dist_x",
+        "lead_height",
+        "lead_length",
+        "lead_sigma",
+        "longitude",
+        "ssh_n",
+        "ssh_ndx",
+    ]
+
+    D_leads = dict()
     for var in vars_leads:
-        D_leads[var] = ATL07[beam+'/leads/' +var][:]
+        D_leads[var] = ATL07[beam + "/leads/" + var][:]
     DF_leads = pd.DataFrame(D_leads)
 
     return DF, DF_leads
 
-def getATL07_height_corrections(fileT, beam='gt1r'):
+
+def getATL07_height_corrections(fileT, beam="gt1r"):
     """
     This method returns relevant data for wave estimates from ALT 07 tracks.
     returns: Pandas data frame
@@ -654,47 +756,54 @@ def getATL07_height_corrections(fileT, beam='gt1r'):
 
     import h5py
     import pandas as pd
+
     # Open the file
-    ATL07 = h5py.File(fileT, 'r')
+    ATL07 = h5py.File(fileT, "r")
 
     ### bulk positions and statistics
     vars_bulk = [
-            'longitude',
-            'latitude',
-            'height_segment_id',# Height segment ID (10 km segments)
-            'seg_dist_x' # Along track distance from the equator crossing to the segment center.
-            ]
+        "longitude",
+        "latitude",
+        "height_segment_id",  # Height segment ID (10 km segments)
+        "seg_dist_x",  # Along track distance from the equator crossing to the segment center.
+    ]
 
-    D_bulk= dict()
+    D_bulk = dict()
     for var in vars_bulk:
-        D_bulk[var] = ATL07[beam+'/sea_ice_segments/'+var]
+        D_bulk[var] = ATL07[beam + "/sea_ice_segments/" + var]
     dF_bulk = pd.DataFrame(D_bulk)
 
     #  Nathan says it's the number of seconds since the GPS epoch on midnight Jan. 6, 1980
-    delta_time=ATL07[beam+'/sea_ice_segments/delta_time'][:]
+    delta_time = ATL07[beam + "/sea_ice_segments/delta_time"][:]
     # #Add this value to delta time parameters to compute full gps_seconds
-    atlas_epoch=ATL07['/ancillary_data/atlas_sdp_gps_epoch'][:]
-    dF_time = get_time_for_track(delta_time,atlas_epoch)
+    atlas_epoch = ATL07["/ancillary_data/atlas_sdp_gps_epoch"][:]
+    dF_time = get_time_for_track(delta_time, atlas_epoch)
 
     ### Primary variables of interest
     vars = [
-            'height_segment_dac', #
-            'height_segment_ib', #
-            'height_segment_lpe',# #
-            'height_segment_mss', #
-            'height_segment_ocean',
-            ]
-    D_heights=dict()
+        "height_segment_dac",  #
+        "height_segment_ib",  #
+        "height_segment_lpe",  # #
+        "height_segment_mss",  #
+        "height_segment_ocean",
+    ]
+    D_heights = dict()
     for var in vars:
-        D_heights[var] = ATL07[beam+'/sea_ice_segments/geophysical/' +var][:]
+        D_heights[var] = ATL07[beam + "/sea_ice_segments/geophysical/" + var][:]
     dF_heights = pd.DataFrame(D_heights)
 
-
-    #Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
-    DF = pd.concat({ 'time': dF_time,  'ref': dF_bulk, 'corrections': dF_heights, }, axis=1)
+    # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
+    DF = pd.concat(
+        {
+            "time": dF_time,
+            "ref": dF_bulk,
+            "corrections": dF_heights,
+        },
+        axis=1,
+    )
 
     ATL07.close()
     # Filter out high elevation values
     # Reset row indexing
-    DF=DF.reset_index(drop=True)
+    DF = DF.reset_index(drop=True)
     return DF
diff --git a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
index 0f04f155..76f0fcc6 100644
--- a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
+++ b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
@@ -5,9 +5,10 @@
 import sys
 import datetime
 import copy
+from pathlib import Path
 
 import xarray as xr
-from sliderule import sliderule, icesat2
+from sliderule import icesat2
 
 from icesat2_tracks.config.IceSAT2_startup import (
     mconfig,
@@ -15,18 +16,16 @@
     font_for_pres,
     plt,
 )
-import icesat2_tracks.ICEsat2_SI_tools.sliderule_converter_tools as sct
-import icesat2_tracks.ICEsat2_SI_tools.io as io
-import icesat2_tracks.ICEsat2_SI_tools.beam_stats as beam_stats
-import icesat2_tracks.local_modules.m_tools_ph3 as MT
-from icesat2_tracks.local_modules import m_general_ph3 as M
-
+from icesat2_tracks.ICEsat2_SI_tools import (
+    sliderule_converter_tools as sct,
+    io,
+    beam_stats,
+)
+from icesat2_tracks.local_modules import m_tools_ph3 as MT, m_general_ph3 as M
 
 xr.set_options(display_style="text")
 
-
 # Select region and retrive batch of tracks
-
 track_name, batch_key, ID_flag = io.init_from_input(
     sys.argv
 )  # loads standard experiment
@@ -34,20 +33,16 @@
 plot_flag = True
 hemis = batch_key.split("_")[0]
 
-
-save_path = mconfig["paths"]["work"] + "/" + batch_key + "/B01_regrid/"
-MT.mkdirs_r(save_path)
-
-save_path_json = mconfig["paths"]["work"] + "/" + batch_key + "/A01b_ID/"
-MT.mkdirs_r(save_path_json)
+# Make target directories
+basedir = Path(mconfig["paths"]["work"], batch_key)
+save_path, save_path_json = Path(basedir, "B01_regrid"), Path(basedir, "A01b_ID")
+for p in [save_path, save_path_json]:
+    MT.mkdirs_r(p)
 
 ATL03_track_name = "ATL03_" + track_name + ".h5"
 
 # Configure SL Session
-sliderule.authenticate("brown", ps_username="mhell", ps_password="Oijaeth9quuh")
-icesat2.init(
-    "slideruleearth.io", organization="brown", desired_nodes=1, time_to_live=90
-)  # minutes
+icesat2.init("slideruleearth.io")
 
 
 # plot the ground tracks in geographic location
@@ -90,10 +85,8 @@
 }
 
 maximum_height = 30  # (meters) maximum height past dem_h correction
-print("STARTS")
-gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name])
-print("ENDS")
-gdf = sct.correct_and_remove_height(gdf, maximum_height)
+
+gdf = io.get_gdf(ATL03_track_name, params_yapc, maximum_height)
 
 
 cdict = dict()

From 4d563ca1b9a6d80c86495f7cde25f091d4a28a13 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Mon, 22 Jan 2024 16:36:42 -0500
Subject: [PATCH 05/30] fix: use better name for wrapper

---
 src/icesat2_tracks/ICEsat2_SI_tools/io.py     | 33 ++++++++++---------
 .../analysis_db/B01_SL_load_single_file.py    |  2 +-
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/io.py b/src/icesat2_tracks/ICEsat2_SI_tools/io.py
index f3349744..193229de 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/io.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/io.py
@@ -80,7 +80,18 @@ def ID_to_str(ID_name):
     return IDs[0] + " " + date + " granule: " + IDs[2]
 
 
-def get_gdf(ATL03_track_name, params_yapc, maximum_height):
+def get_atl06p(ATL03_track_name, params_yapc, maximum_height):
+    """
+    This method retrieves the ATL06 data from sliderule and returns a geodataframe. It also applies the corrections and removes the points above the maximum height. If the geodataframe is empty, an exception is raised.
+
+    Parameters:
+    ATL03_track_name (str): The name of the ATL03 track.
+    params_yapc (dict): The parameters for the YAPC correction.
+    maximum_height (float): The maximum height to filter out.
+
+    Returns:
+    geopandas.GeoDataFrame: The geodataframe containing the ATL06 data.
+    """
     print("Retrieving data from sliderule ...")
     gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name])
 
@@ -122,7 +133,6 @@ def __init__(self, track_name):
             self.hemis = "SH"
         else:
             self.hemis = self.hemis
-        # self.hemis = hemis
         self.set()
         self.track_name_init = track_name
 
@@ -217,12 +227,9 @@ def nsidc_icesat2_get_associated_file(
     import icesat2_toolkit.utilities
 
     AUXILIARY = False
-    # product='ATL03'
     DIRECTORY = None
     FLATTEN = False
     TIMEOUT = 120
-    MODE = 0o775
-    # file_list  = ['ATL07-01_20210301023054_10251001_005_01']
 
     if build and not (username or password):
         urs = "urs.earthdata.nasa.gov"
@@ -497,11 +504,7 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
         mask_total = mask_seaice | mask_ocean
 
     signal_confidence = ATL03[beam + "/heights/signal_conf_ph"][:, 1:3].max(1)
-    # print(signal_confidence.shape)
-
-    # return signal_confidence
 
-    # Add photon rate and background rate to the reader here
     ATL03.close()
 
     if numpy == True:
@@ -521,8 +524,7 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
                 "across_track_distance": across_track_distance,
                 "ph_id_count": ph_id_count,
             }
-        )  # ,
-        #'year':year, 'month':month, 'day':day, 'hour':hour,'minute':minute , 'second':second})
+        )
 
         dF_seg = pd.DataFrame(
             {
@@ -781,10 +783,10 @@ def getATL07_height_corrections(fileT, beam="gt1r"):
 
     ### Primary variables of interest
     vars = [
-        "height_segment_dac",  #
-        "height_segment_ib",  #
-        "height_segment_lpe",  # #
-        "height_segment_mss",  #
+        "height_segment_dac",
+        "height_segment_ib",
+        "height_segment_lpe",
+        "height_segment_mss",
         "height_segment_ocean",
     ]
     D_heights = dict()
@@ -792,7 +794,6 @@ def getATL07_height_corrections(fileT, beam="gt1r"):
         D_heights[var] = ATL07[beam + "/sea_ice_segments/geophysical/" + var][:]
     dF_heights = pd.DataFrame(D_heights)
 
-    # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
     DF = pd.concat(
         {
             "time": dF_time,
diff --git a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
index 76f0fcc6..8e05ab67 100644
--- a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
+++ b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
@@ -86,7 +86,7 @@
 
 maximum_height = 30  # (meters) maximum height past dem_h correction
 
-gdf = io.get_gdf(ATL03_track_name, params_yapc, maximum_height)
+gdf = io.get_atl06p(ATL03_track_name, params_yapc, maximum_height)
 
 
 cdict = dict()

From 5380bda903994b1c6a1c536528cfe652a8b4be15 Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Mon, 29 Jan 2024 07:51:09 -0500
Subject: [PATCH 06/30] adding step6 to test workflow. removing deprecated
 messages

---
 .github/workflows/test-B01_SL_load_single_file.yml |  2 ++
 src/icesat2_tracks/analysis_db/B05_define_angle.py | 13 ++++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/test-B01_SL_load_single_file.yml b/.github/workflows/test-B01_SL_load_single_file.yml
index cfbf9d7a..df7917a2 100644
--- a/.github/workflows/test-B01_SL_load_single_file.yml
+++ b/.github/workflows/test-B01_SL_load_single_file.yml
@@ -33,3 +33,5 @@ jobs:
         run: python src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py SH_20190502_05180312 SH_testSLsinglefile2 True
       - name: Fifth step B04_angle
         run: python src/icesat2_tracks/analysis_db/B04_angle.py SH_20190502_05180312 SH_testSLsinglefile2 True
+      - name: Sixth step B04_angle
+        run: python src/icesat2_tracks/analysis_db/B05_define_angle.py SH_20190502_05180312 SH_testSLsinglefile2 True
diff --git a/src/icesat2_tracks/analysis_db/B05_define_angle.py b/src/icesat2_tracks/analysis_db/B05_define_angle.py
index 6a71c395..a50f27b4 100644
--- a/src/icesat2_tracks/analysis_db/B05_define_angle.py
+++ b/src/icesat2_tracks/analysis_db/B05_define_angle.py
@@ -8,18 +8,17 @@
 
 from icesat2_tracks.config.IceSAT2_startup import (
     mconfig,
-    xr,
     color_schemes,
     plt,
-    font_for_print,
-    np,
+    font_for_print    
 )
 
 import icesat2_tracks.ICEsat2_SI_tools.io as io
 import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec
 
 from numba import jit
-
+import xarray as xr
+import numpy as np
 import time
 import icesat2_tracks.ICEsat2_SI_tools.lanczos as lanczos
 import icesat2_tracks.local_modules.m_tools_ph3 as MT
@@ -76,7 +75,7 @@ def weighted_means(data, weights, x_angle, color="k"):
     weights should have nans when there is no data
     data should have zeros where there is no data
     """
-    from scipy.ndimage.measurements import label
+    from scipy.ndimage import label
 
     # make wavenumber groups
     groups, Ngroups = label(weights.where(~np.isnan(weights), 0))
@@ -114,10 +113,10 @@ def weighted_means(data, weights, x_angle, color="k"):
 
 # makde dummy variables
 M_final = xr.full_like(
-    corrected_marginals.isel(k=0, beam_group=0).drop("beam_group").drop("k"), np.nan
+    corrected_marginals.isel(k=0, beam_group=0).drop_vars("beam_group").drop_vars("k"), np.nan
 )
 M_final_smth = xr.full_like(
-    corrected_marginals.isel(k=0, beam_group=0).drop("beam_group").drop("k"), np.nan
+    corrected_marginals.isel(k=0, beam_group=0).drop_vars("beam_group").drop_vars("k"), np.nan
 )
 if M_final.shape[0] > M_final.shape[1]:
     M_final = M_final.T

From ff5f5904851855244ad3a91e2b73aa4fe5ccd4d8 Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Mon, 29 Jan 2024 08:17:04 -0500
Subject: [PATCH 07/30] fixing step6 name in workflow

---
 .github/workflows/test-B01_SL_load_single_file.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-B01_SL_load_single_file.yml b/.github/workflows/test-B01_SL_load_single_file.yml
index df7917a2..54fc432f 100644
--- a/.github/workflows/test-B01_SL_load_single_file.yml
+++ b/.github/workflows/test-B01_SL_load_single_file.yml
@@ -33,5 +33,5 @@ jobs:
         run: python src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py SH_20190502_05180312 SH_testSLsinglefile2 True
       - name: Fifth step B04_angle
         run: python src/icesat2_tracks/analysis_db/B04_angle.py SH_20190502_05180312 SH_testSLsinglefile2 True
-      - name: Sixth step B04_angle
+      - name: Sixth step B05_define_angle
         run: python src/icesat2_tracks/analysis_db/B05_define_angle.py SH_20190502_05180312 SH_testSLsinglefile2 True

From 2182165cac1df37f5ef21deaaf6a92b5181a3926 Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Mon, 29 Jan 2024 12:16:46 -0500
Subject: [PATCH 08/30] removed duplicated file. move docustring to top of the
 file

---
 analysis_db/B05_define_angle.py               | 506 ------------------
 .../analysis_db/B05_define_angle.py           |   5 +-
 2 files changed, 2 insertions(+), 509 deletions(-)
 delete mode 100644 analysis_db/B05_define_angle.py

diff --git a/analysis_db/B05_define_angle.py b/analysis_db/B05_define_angle.py
deleted file mode 100644
index 05aa1c37..00000000
--- a/analysis_db/B05_define_angle.py
+++ /dev/null
@@ -1,506 +0,0 @@
-
-import os, sys
-#execfile(os.environ['PYTHONSTARTUP'])
-
-"""
-This file open a ICEsat2 track applied filters and corections and returns smoothed photon heights on a regular grid in an .nc file.
-This is python 3
-"""
-
-exec(open(os.environ['PYTHONSTARTUP']).read())
-exec(open(STARTUP_2021_IceSAT2).read())
-xr.set_options(display_style='text')
-#%matplotlib inline
-
-import ICEsat2_SI_tools.convert_GPS_time as cGPS
-import h5py
-import ICEsat2_SI_tools.io as io
-import ICEsat2_SI_tools.spectral_estimates as spec
-
-import imp
-import copy
-import spicke_remover
-import datetime
-import concurrent.futures as futures
-
-from numba import jit
-
-from ICEsat2_SI_tools import angle_optimizer
-import ICEsat2_SI_tools.wave_tools as waves
-import concurrent.futures as futures
-
-import time
-import ICEsat2_SI_tools.lanczos as lanczos
-
-
-col.colormaps2(21)
-
-col_dict = col.rels
-#import s3fs
-# %%
-track_name, batch_key, test_flag = io.init_from_input(sys.argv) # loads standard experiment
-#track_name, batch_key, test_flag = '20190605061807_10380310_004_01', 'SH_batch01', False
-#track_name, batch_key, test_flag = '20190601094826_09790312_004_01', 'SH_batch01', False
-#track_name, batch_key, test_flag = '20190207111114_06260210_004_01', 'SH_batch02', False
-#track_name, batch_key, test_flag = '20190219073735_08070210_004_01', 'SH_batch02', False
-#track_name, batch_key, test_flag = '20190215184558_07530210_004_01', 'SH_batch02', False
-
-# good track
-#track_name, batch_key, test_flag = '20190502021224_05160312_004_01', 'SH_batch02', False
-#track_name, batch_key, test_flag = '20190502050734_05180310_004_01', 'SH_batch02', False
-#track_name, batch_key, test_flag = '20190216200800_07690212_004_01', 'SH_batch02', False
-
-#track_name, batch_key, test_flag = '20190213133330_07190212_004_01', 'SH_batch02', False
-#track_name, batch_key, test_flag = '20190219073735_08070210_004_01', 'SH_batch02', False
-#track_name, batch_key, test_flag =  'SH_20190224_08800212', 'SH_publish', True
-
-#print(track_name, batch_key, test_flag)
-hemis, batch = batch_key.split('_')
-#track_name= '20190605061807_10380310_004_01'
-ATlevel= 'ATL03'
-
-
-
-plot_path   = mconfig['paths']['plot'] + '/'+hemis+'/'+batch_key+'/' + track_name + '/B05_angle/'
-MT.mkdirs_r(plot_path)
-#bad_track_path =mconfig['paths']['work'] +'bad_tracks/'+ batch_key+'/'
-# %%
-
-all_beams   = mconfig['beams']['all_beams']
-high_beams  = mconfig['beams']['high_beams']
-low_beams   = mconfig['beams']['low_beams']
-beam_groups = mconfig['beams']['groups']
-group_names = mconfig['beams']['group_names']
-#Gfilt   = io.load_pandas_table_dict(track_name + '_B01_regridded', load_path) # rhis is the rar photon data
-
-# load_path   = mconfig['paths']['work'] +'/B01_regrid_'+hemis+'/'
-# G_binned    = io.load_pandas_table_dict(track_name + '_B01_binned' , load_path)  #
-
-load_path   = mconfig['paths']['work']+batch_key+'/B02_spectra/'
-Gk          = xr.load_dataset(load_path+ '/B02_'+track_name + '_gFT_k.nc' )  #
-
-load_path   = mconfig['paths']['work'] +batch_key+'/B04_angle/'
-Marginals   = xr.load_dataset(load_path+ '/B04_'+track_name + '_marginals.nc' )  #
-
-# %% load prior information
-load_path   = mconfig['paths']['work']+batch_key+'/A02_prior/'
-Prior = MT.load_pandas_table_dict('/A02_'+track_name, load_path)['priors_hindcast']
-
-save_path =  mconfig['paths']['work'] +batch_key+ '/B04_angle/'
-
-# font_for_print()
-# F = M.figure_axis_xy(5.5, 3, view_scale= 0.8)
-# plt.suptitle(track_name)
-# ax1 =  plt.subplot(2, 1, 1)
-# plt.title('Data in Beam', loc= 'left')
-#
-# xi =1
-
-#data = Marginals.isel(x=xi).sel(beam_group= 'group1').marginals
-# angle_mask = Marginals.angle[2:-2]
-#
-#data.T.plot(cmap= plt.cm.OrRd)
-
-# %%
-
-
-def derive_weights(weights):
-    weights = (weights-weights.mean())/weights.std()
-    weights = weights - weights.min()
-    return weights
-
-def weighted_means(data, weights, x_angle, color='k'):
-    """
-    weights should have nans when there is no data
-    data should have zeros where there is no data
-    """
-    from scipy.ndimage.measurements import label
-    # make wavenumber groups
-    groups, Ngroups = label(weights.where(~np.isnan(weights), 0)  )
-
-    for ng in np.arange(1, Ngroups+1):
-        wi          = weights[groups == ng]
-        weight_norm = weights.sum('k')
-        k           = wi.k.data
-        data_k      = data.sel(k=k).squeeze()
-        data_weight = (data_k * wi)
-        plt.stairs(data_weight.sum('k')/ weight_norm , x_angle, linewidth=1 , color ='k')
-        if data_k.k.size > 1:
-            for k in data_k.k.data:
-                plt.stairs(data_weight.sel(k=k) / weight_norm, x_angle, color ='gray', alpha =0.5)
-
-    data_weighted_mean = (data.where( (~np.isnan(data)) & (data != 0), np.nan) * weights ).sum('k')/weight_norm
-    return data_weighted_mean
-
-
-
-
-# cut out data at the boundary and redistibute variance
-angle_mask = Marginals.angle *0 ==0
-angle_mask[0], angle_mask[-1] = False, False
-corrected_marginals = Marginals.marginals.isel(angle=angle_mask ) + Marginals.marginals.isel(angle=~angle_mask ).sum('angle')/sum(angle_mask).data
-
-# get groupweights
-# ----------------- thius does not work jet.ckeck with data on server how to get number of data points per stancil
-#Gx['x'] = Gx.x - Gx.x[0]
-
-# makde dummy variables
-M_final      = xr.full_like(corrected_marginals.isel(k=0, beam_group =0).drop('beam_group').drop('k'), np.nan)
-M_final_smth = xr.full_like(corrected_marginals.isel(k=0, beam_group =0).drop('beam_group').drop('k'), np.nan)
-if M_final.shape[0] > M_final.shape[1]:
-    M_final= M_final.T
-    M_final_smth= M_final_smth.T
-    corrected_marginals=corrected_marginals.T
-
-Gweights = corrected_marginals.N_data
-Gweights = Gweights/Gweights.max()
-
-k_mask = corrected_marginals.mean('beam_group').mean('angle')
-
-xticks_2pi = np.arange(-np.pi, np.pi+np.pi/4, np.pi/4)
-xtick_labels_2pi = ['-$\pi$', '-$3\pi/4$', '-$\pi/2$','-$\pi/4$','0','$\pi/4$','$\pi/2$','$3\pi/4$','$\pi$']
-
-xticks_pi = np.arange(-np.pi/2, np.pi/2+np.pi/4, np.pi/4)
-xtick_labels_pi = ['-$\pi/2$','-$\pi/4$','0','$\pi/4$','$\pi/2$',]
-
-
-font_for_print()
-x_list = corrected_marginals.x
-for xi in range(x_list.size):
-
-
-    F = M.figure_axis_xy(7,3.5, view_scale= 0.8, container = True)
-    gs = GridSpec(3,2,  wspace=0.1,  hspace=.8)#figure=fig,
-    x_str= str(int(x_list[xi]/1e3))
-
-    plt.suptitle('Weighted marginal PDFs\nx='+ x_str +'\n'+io.ID_to_str(track_name), y= 1.05, x = 0.125, horizontalalignment= 'left')
-    group_weight = Gweights.isel(x =xi)
-
-    ax_list= dict()
-    ax_sum = F.fig.add_subplot(gs[1, 1])
-    #ax_sum.tick_params(labelbottom=False)
-
-    ax_list['sum'] = ax_sum
-
-    data_collect = dict()
-    for group, gpos in zip(Marginals.beam_group.data, [ gs[0, 0], gs[0, 1], gs[1, 0]] ):
-        ax0 = F.fig.add_subplot(gpos)
-        ax0.tick_params(labelbottom=False)
-        ax_list[group] = ax0
-
-        data    = corrected_marginals.isel(x=xi).sel(beam_group= group)
-        weights = derive_weights( Marginals.weight.isel(x=xi).sel(beam_group= group)  )
-        weights = weights**2
-
-        # derive angle axis
-        x_angle = data.angle.data
-        d_angle= np.diff(x_angle)[0]
-        x_angle = np.insert(x_angle, x_angle.size , x_angle[-1].data +  d_angle)
-
-        if ( (~np.isnan(data)).sum().data == 0) | (( ~np.isnan(weights)).sum().data == 0):
-            data_wmean = data.mean('k')
-        else:
-            data_wmean = weighted_means(data, weights, x_angle, color= col_dict[group] )
-            plt.stairs(data_wmean , x_angle, color =col_dict[group], alpha =1)
-        # test if density is correct
-        # if np.round(np.trapz(data_wmean) * d_angle, 2) < 0.90:
-        #     raise ValueError('weighted mean is not a density anymore')
-
-        plt.title('Marginal PDF '+ group, loc ='left')
-        plt.sca(ax_sum)
-
-        # if data_collect is None:
-        #     data_collect = data_wmean
-        # else:
-        data_collect[group] = data_wmean
-        #ax0.set_yscale('log')
-
-
-    data_collect = xr.concat(data_collect.values(), dim='beam_group')
-    final_data   = (group_weight * data_collect).sum('beam_group')/group_weight.sum('beam_group').data
-
-    plt.sca(ax_sum)
-    plt.stairs( final_data , x_angle, color = 'k', alpha =1, linewidth =0.8)
-    ax_sum.set_xlabel('Angle (rad)')
-    plt.title('Weighted mean over group & wavenumber', loc='left')
-
-    # get relevant priors
-    for axx in ax_list.values():
-        axx.set_ylim(0, final_data.max() * 1.5)
-        #figureaxx.set_yscale('log')
-        axx.set_xticks(xticks_pi)
-        axx.set_xticklabels(xtick_labels_pi)
-
-    try:
-        ax_list['group3'].set_ylabel('PDF')
-        ax_list['group1'].set_ylabel('PDF')
-        ax_list['group3'].tick_params(labelbottom=True)
-        ax_list['group3'].set_xlabel('Angle (rad)')
-    except:
-        pass
-
-    ax_final = F.fig.add_subplot(gs[-1, :])
-    plt.title('Final angle PDF', loc='left')
-
-    priors_k = Marginals.Prior_direction[ ~np.isnan(k_mask.isel(x= xi))]
-    for pk in priors_k:
-        ax_final.axvline(pk, color =col.cascade2, linewidth= 1, alpha = 0.7)
-
-    plt.stairs( final_data , x_angle, color = 'k', alpha =0.5, linewidth =0.8)
-
-    final_data_smth = lanczos.lanczos_filter_1d(x_angle,final_data, 0.1)
-
-    plt.plot(x_angle[0:-1], final_data_smth, color = 'black', linewidth= 0.8)
-
-    ax_final.axvline( x_angle[0:-1][final_data_smth.argmax()], color =col.orange, linewidth= 1.5, alpha = 1, zorder= 1)
-    ax_final.axvline( x_angle[0:-1][final_data_smth.argmax()], color =col.black, linewidth= 3.2, alpha = 1, zorder= 0)
-
-
-    plt.xlabel('Angle (rad)')
-    plt.xlim(-np.pi*0.8, np.pi*0.8)
-
-    ax_final.set_xticks(xticks_pi)
-    ax_final.set_xticklabels(xtick_labels_pi)
-
-    M_final[xi,:] = final_data
-    M_final_smth[xi, :] = final_data_smth
-
-    F.save_pup(path = plot_path, name = 'B05_weigthed_margnials_x' + x_str)
-
-
-
-M_final.name='weighted_angle_PDF'
-M_final_smth.name='weighted_angle_PDF_smth'
-Gpdf = xr.merge([M_final,M_final_smth])
-
-if len(Gpdf.x) < 2:
-    print('not enough x data, exit')
-    MT.json_save('B05_fail', plot_path+'../',  {'time':time.asctime( time.localtime(time.time()) ) , 'reason': 'not enough x segments'})
-    print('exit()')
-    exit()
-
-# %%
-class plot_polarspectra:
-        def __init__(self,k, thetas, data, data_type='fraction' ,lims=None,  verbose=False):
-
-            """
-            data_type       either 'fraction' or 'energy', default (fraction)
-            lims            (None) limts of k. if None set by the limits of the vector k
-            """
-            self.k      =k
-            self.data   =data
-            self.thetas =thetas
-
-            #self.sample_unit=sample_unit if sample_unit is not None else 'df'
-            # decided on freq limit
-            self.lims= lims = [self.k.min(),self.k.max()] if lims is None else lims #1.0 /lims[1], 1.0/ lims[0]
-            freq_sel_bool=M.cut_nparray(self.k, lims[0], lims[1] )
-
-            self.min=np.round(np.nanmin(data[freq_sel_bool,:]), 2)#*0.5e-17
-            self.max=np.round(np.nanmax(data[freq_sel_bool,:]), 2)
-            if verbose:
-                print(str(self.min), str(self.max) )
-
-            self.klabels=np.linspace(self.min, self.max, 5) #np.arange(10, 100, 20)
-
-            self.data_type=data_type
-            if data_type == 'fraction':
-                self.clevs=np.linspace(np.nanpercentile(dir_data.data, 1), np.ceil(self.max* 0.9), 21)
-            elif data_type == 'energy':
-                self.ctrs_min=self.min+self.min*.05
-                #self.clevs=np.linspace(self.min, self.max, 21)
-                self.clevs=np.linspace(self.min+self.min*.05, self.max*.60, 21)
-
-
-        def linear(self, radial_axis='period', ax=None, cbar_flag=True):
-
-            """
-            """
-            if ax is None:
-                ax          = plt.subplot(111, polar=True)
-                #self.title  = plt.suptitle('  Polar Spectrum', y=0.95, x=0.5 , horizontalalignment='center')
-            else:
-                ax=ax
-            ax.set_theta_direction(-1)  #right turned postive
-            ax.set_theta_zero_location("W")
-
-            grid=ax.grid(color='k', alpha=.5, linestyle='-', linewidth=.5)
-
-            if self.data_type == 'fraction':
-                cm=plt.cm.RdYlBu_r #brewer2mpl.get_map( 'RdYlBu','Diverging', 4, reverse=True).mpl_colormap
-                colorax = ax.contourf(self.thetas,self.k, self.data, self.clevs, cmap=cm, zorder=1)# ,cmap=cm)#, vmin=self.ctrs_min)
-            elif self.data_type == 'energy':
-                cm=plt.cm.Paired#brewer2mpl.get_map( 'Paired','Qualitative', 8).mpl_colormap
-                cm.set_under='w'
-                cm.set_bad='w'
-                colorax = ax.contourf(self.thetas,self.k, self.data, self.clevs, cmap=cm, zorder=1)#, vmin=self.ctrs_min)
-            #divider = make_axes_locatable(ax)
-            #cax = divider.append_axes("right", size="5%", pad=0.05)
-
-            if cbar_flag:
-                cbar = plt.colorbar(colorax, fraction=0.046, pad=0.1, orientation="horizontal")
-                # if self.data_type == 'fraction':
-                #     cbar.set_label('Energy Distribution', rotation=0, fontsize=fontsize)
-                # elif self.data_type == 'energy':
-                #     cbar.set_label('Energy Density ('+self.unit+')', rotation=0, fontsize=fontsize)
-                cbar.ax.get_yaxis().labelpad = 30
-                cbar.outline.set_visible(False)
-                #cbar.ticks.
-                clev_tick_names, clev_ticks =MT.tick_formatter(FP.clevs, expt_flag= False, shift= 0, rounder=4, interval=1)
-                cbar.set_ticks(clev_ticks[::5])
-                cbar.set_ticklabels(clev_tick_names[::5])
-                self.cbar  = cbar
-
-            if (self.lims[-1]- self.lims[0]) > 500:
-                radial_ticks = np.arange(100, 1600, 300)
-            else:
-                radial_ticks = np.arange(100, 800, 100)
-            xx_tick_names, xx_ticks = MT.tick_formatter( radial_ticks , expt_flag= False, shift= 1, rounder=0, interval=1)
-            #xx_tick_names, xx_ticks = MT.tick_formatter( np.arange( np.floor(self.k.min()),self.k.max(), 20) , expt_flag= False, shift= 1, rounder=0, interval=1)
-            xx_tick_names = ['  '+str(d)+'m' for d in xx_tick_names]
-
-            ax.set_yticks(xx_ticks[::1])
-            ax.set_yticklabels(xx_tick_names[::1])
-
-            degrange    = np.arange(0,360,30)
-            degrange    = degrange[(degrange<=80)| (degrange>=280)]
-            degrange_label = np.copy(degrange)
-            degrange_label[degrange_label > 180] = degrange_label[degrange_label > 180] - 360
-
-            degrange_label = [str(d)+'$^{\circ}$' for d in degrange_label]
-
-            lines, labels = plt.thetagrids(degrange, labels=degrange_label)#, frac = 1.07)
-
-            for line in lines:
-                #L=line.get_xgridlines
-                line.set_linewidth(5)
-                #line.set_linestyle(':')
-
-            #ax.set_yscale('log')
-            ax.set_ylim(self.lims)
-            ax.spines['polar'].set_color("none")
-            ax.set_rlabel_position(87)
-            self.ax=ax
-
-
-font_for_print()
-F = M.figure_axis_xy(6, 5.5, view_scale= 0.7, container = True)
-gs = GridSpec(8,6,  wspace=0.1,  hspace=3.1)#figure=fig,
-col.colormaps2(21)
-
-cmap_spec= plt.cm.ocean_r
-clev_spec = np.linspace(-8, -1, 21) *10
-
-cmap_angle= col.cascade_r
-clev_angle = np.linspace(0, 4, 21)
-
-
-ax1 = F.fig.add_subplot(gs[0:3, :])
-ax1.tick_params(labelbottom=False)
-
-weighted_spec   = (Gk.gFT_PSD_data * Gk.N_per_stancil).sum('beam') /Gk.N_per_stancil.sum('beam')
-x_spec          = weighted_spec.x/1e3
-k               = weighted_spec.k
-
-xlims = x_spec[0], x_spec[-1]
-#weighted_spec.plot()
-#clev_spec = np.linspace(-8, -1, 21) *10
-clev_spec = np.linspace(-80, (10* np.log(weighted_spec)).max() * 0.9, 21)
-
-plt.pcolor(x_spec, k, 10* np.log(weighted_spec),vmin= clev_spec[0], vmax= clev_spec[-1],  cmap =cmap_spec )
-
-
-plt.title(track_name + '\nPower Spectra (m/m)$^2$ k$^{-1}$', loc='left')
-
-cbar = plt.colorbar( fraction=0.018, pad=0.01, orientation="vertical", label ='Power')
-cbar.outline.set_visible(False)
-clev_ticks = np.round(clev_spec[::3], 0)
-#clev_tick_names, clev_ticks =MT.tick_formatter(clev_spec, expt_flag= False, shift= 0, rounder=1, interval=2)
-cbar.set_ticks(clev_ticks)
-cbar.set_ticklabels(clev_ticks)
-
-plt.ylabel('wavenumber $k$')
-
-#plt.colorbar()
-ax2 = F.fig.add_subplot(gs[3:5, :])
-ax2.tick_params(labelleft=True)
-
-dir_data = Gpdf.interp(x= weighted_spec.x).weighted_angle_PDF_smth.T
-
-x = Gpdf.x/1e3
-angle = Gpdf.angle
-plt.pcolor(x_spec, angle, dir_data , vmin= clev_angle[0], vmax= clev_angle[-1], cmap =cmap_angle)
-#plt.contourf(x_spec, angle, dir_data ,clev_angle, cmap =cmap_angle)
-
-cbar = plt.colorbar( fraction=0.01, pad=0.01, orientation="vertical", label ='Density')
-plt.title('Direction PDF', loc='left')
-
-plt.xlabel('x (km)')
-plt.ylabel('angle')
-
-ax2.set_yticks(xticks_pi)
-ax2.set_yticklabels(xtick_labels_pi)
-
-
-x_ticks  = np.arange(0, xlims[-1].data, 50)
-x_tick_labels, x_ticks = MT.tick_formatter(x_ticks, expt_flag= False, shift= 0, rounder=1, interval=2)
-
-ax1.set_xticks(x_ticks)
-ax2.set_xticks(x_ticks)
-ax1.set_xticklabels(x_tick_labels)
-ax2.set_xticklabels(x_tick_labels)
-ax1.set_xlim(xlims)
-ax2.set_xlim(xlims)
-
-
-xx_list = np.insert(corrected_marginals.x.data, 0, 0)
-x_chunks = spec.create_chunk_boundaries( int(xx_list.size/3),  xx_list.size,  iter_flag= False )
-x_chunks = x_chunks[:, ::2]
-x_chunks[-1, -1] = xx_list.size-1
-#x_chunks#.shape
-
-for x_pos, gs in zip( x_chunks.T , [ gs[-3:, 0:2], gs[-3:, 2:4], gs[-3:, 4:]] ):
-    #print( x_pos)
-    #print( xx_list[x_pos])
-    x_range = xx_list[[x_pos[0], x_pos[-1]]]
-
-    ax1.axvline(x_range[0]/1e3, linestyle= ':', color= 'white', alpha = 0.5)
-    ax1.axvline(x_range[-1]/1e3, color = 'gray', alpha = 0.5)
-
-    ax2.axvline(x_range[0]/1e3, linestyle= ':', color= 'white', alpha = 0.5)
-    ax2.axvline(x_range[-1]/1e3, color = 'gray', alpha = 0.5)
-
-
-    i_spec  = weighted_spec.sel(x= slice(x_range[0], x_range[-1]) )
-    i_dir   = corrected_marginals.sel(x= slice(x_range[0], x_range[-1]) )
-
-    dir_data  = (i_dir * i_dir.N_data).sum([ 'beam_group', 'x'])/ i_dir.N_data.sum([ 'beam_group', 'x'])
-    lims = dir_data.k[ (dir_data.sum('angle')!=0) ][0].data, dir_data.k[ (dir_data.sum('angle')!=0)  ][-1].data
-
-    N_angle = i_dir.angle.size
-    dir_data2 =  dir_data#.where( dir_data.sum('angle') !=0, 1/N_angle/d_angle )
-
-    plot_data  = dir_data2  * i_spec.mean('x')
-    plot_data  = plot_data.rolling(angle =5, k =10).median()#.plot()
-
-    plot_data = plot_data.sel(k=slice(lims[0],lims[-1] ) )
-    xx = 2 * np.pi/plot_data.k
-
-    #F = M.figure_axis_xy(5, 4)
-    #ax = plt.subplot(1, 1, 1, polar=True)
-    #
-    if np.nanmax(plot_data.data) != np.nanmin(plot_data.data):
-
-        ax3 = F.fig.add_subplot(gs, polar=True)
-        FP= plot_polarspectra(xx, plot_data.angle, plot_data, lims=None , verbose= False, data_type= 'fraction')
-        FP.clevs=np.linspace(np.nanpercentile(plot_data.data, 1), np.round(plot_data.max(), 4), 21)
-        FP.linear(ax = ax3, cbar_flag=False)
-        #FP.cbar.set_label('Energy Density ( (m/m)$^2$ k$^{-1}$ deg$^{-1}$ )', rotation=0, fontsize=10)
-        #plt.show()
-
-F.save_pup(path = plot_path + '../', name = 'B05_dir_ov')
-
-# save data
-Gpdf.to_netcdf(save_path+ '/B05_'+track_name + '_angle_pdf.nc' )
-
-MT.json_save('B05_success', plot_path + '../', {'time':time.asctime( time.localtime(time.time()) )})
diff --git a/src/icesat2_tracks/analysis_db/B05_define_angle.py b/src/icesat2_tracks/analysis_db/B05_define_angle.py
index a50f27b4..7a972680 100644
--- a/src/icesat2_tracks/analysis_db/B05_define_angle.py
+++ b/src/icesat2_tracks/analysis_db/B05_define_angle.py
@@ -1,11 +1,10 @@
-import os, sys
-
-
 """
 This file open a ICEsat2 track applied filters and corections and returns smoothed photon heights on a regular grid in an .nc file.
 This is python 3
 """
 
+import  sys
+
 from icesat2_tracks.config.IceSAT2_startup import (
     mconfig,
     color_schemes,

From 1d75666d173ffeb23c02e7063b294cffeaf112d7 Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Tue, 30 Jan 2024 03:53:22 -0500
Subject: [PATCH 09/30] moving imports to the top of file. removing unsued
 imports

---
 src/icesat2_tracks/analysis_db/B05_define_angle.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/icesat2_tracks/analysis_db/B05_define_angle.py b/src/icesat2_tracks/analysis_db/B05_define_angle.py
index 7a972680..b33c7358 100644
--- a/src/icesat2_tracks/analysis_db/B05_define_angle.py
+++ b/src/icesat2_tracks/analysis_db/B05_define_angle.py
@@ -15,16 +15,15 @@
 import icesat2_tracks.ICEsat2_SI_tools.io as io
 import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec
 
-from numba import jit
 import xarray as xr
 import numpy as np
 import time
 import icesat2_tracks.ICEsat2_SI_tools.lanczos as lanczos
 import icesat2_tracks.local_modules.m_tools_ph3 as MT
 import icesat2_tracks.local_modules.m_general_ph3 as M
-
+   
 from matplotlib.gridspec import GridSpec
-
+from scipy.ndimage import label
 color_schemes.colormaps2(21)
 
 col_dict = color_schemes.rels
@@ -74,7 +73,6 @@ def weighted_means(data, weights, x_angle, color="k"):
     weights should have nans when there is no data
     data should have zeros where there is no data
     """
-    from scipy.ndimage import label
 
     # make wavenumber groups
     groups, Ngroups = label(weights.where(~np.isnan(weights), 0))

From 5186087aa3b05a40c0a0b034b3377e18a0fd50ca Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Tue, 30 Jan 2024 15:41:19 -0500
Subject: [PATCH 10/30] refactor: io to iotools and step 1

---
 .../ICEsat2_SI_tools/beam_stats.py            | 210 +++++++++++-------
 .../ICEsat2_SI_tools/{io.py => iotools.py}    |   0
 .../analysis_db/B01_SL_load_single_file.py    |   2 +-
 3 files changed, 133 insertions(+), 79 deletions(-)
 rename src/icesat2_tracks/ICEsat2_SI_tools/{io.py => iotools.py} (100%)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py
index 7a146e8b..3f815e15 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/beam_stats.py
@@ -1,12 +1,13 @@
 import numpy as np
 import pandas as pd
 import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec
-import icesat2_tracks.ICEsat2_SI_tools.io as io_local
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io_local
 
 import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
 
-def derive_beam_statistics(Gd, all_beams,  Lmeter=10e3, dx =10):
+
+def derive_beam_statistics(Gd, all_beams, Lmeter=10e3, dx=10):
     """
     this method returns a dict of dataframes with the beam statistics
     Gd          is a dict of beam tables or a hdf5 file
@@ -16,18 +17,18 @@ def derive_beam_statistics(Gd, all_beams,  Lmeter=10e3, dx =10):
     """
     import h5py
 
-    D=dict()
+    D = dict()
     for k in all_beams:
         if isinstance(Gd, dict):
-            Gi = Gd[k]    
+            Gi = Gd[k]
         elif isinstance(Gd, h5py.File):
-            Gi  = io_local.get_beam_hdf_store(Gd[k])
+            Gi = io_local.get_beam_hdf_store(Gd[k])
         else:
-            print('Gd is neither dict nor hdf5 file')
+            print("Gd is neither dict nor hdf5 file")
             break
 
-        dd = Gi['h_mean']
-        xx = Gi['dist']
+        dd = Gi["h_mean"]
+        xx = Gi["dist"]
 
         def get_var(sti):
             mask = (sti[0] < xx) & (xx <= sti[1])
@@ -39,31 +40,40 @@ def get_N(sti):
 
         def get_lat(sti):
             mask = (sti[0] < xx) & (xx <= sti[1])
-            return np.nanmean(Gi['lats'][mask])
+            return np.nanmean(Gi["lats"][mask])
 
-        iter_x       = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= False)[1,:]
+        iter_x = spec.create_chunk_boundaries_unit_lengths(
+            Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=False
+        )[1, :]
 
-        stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= True)
+        stencil_iter = spec.create_chunk_boundaries_unit_lengths(
+            Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True
+        )
         var_list = np.array(list(map(get_var, stencil_iter)))
 
-        stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= True)
-        N_list   = np.array(list(map(get_N, stencil_iter)))
+        stencil_iter = spec.create_chunk_boundaries_unit_lengths(
+            Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True
+        )
+        N_list = np.array(list(map(get_N, stencil_iter)))
 
-        stencil_iter = spec.create_chunk_boundaries_unit_lengths( Lmeter, [ xx.min(), xx.max()],ov =0, iter_flag= True)
+        stencil_iter = spec.create_chunk_boundaries_unit_lengths(
+            Lmeter, [xx.min(), xx.max()], ov=0, iter_flag=True
+        )
         lat_list = np.array(list(map(get_lat, stencil_iter)))
 
-        # make Dataframe 
+        # make Dataframe
         df = pd.DataFrame()
-        df['x'] = iter_x
-        df['lat'] = lat_list
-        df['var']    = var_list
-        df['N']      = N_list * 2* dx / Lmeter
-        
+        df["x"] = iter_x
+        df["lat"] = lat_list
+        df["var"] = var_list
+        df["N"] = N_list * 2 * dx / Lmeter
+
         D[k] = df
 
     return D
 
-def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name =None):
+
+def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name=None):
     """
     Plots the beam statistics in a 2 x 2 plot
     D is a dict of dataframes with the beam statistics
@@ -84,63 +94,99 @@ def plot_beam_statistics(D, high_beams, low_beams, col_dict, track_name =None):
     # make 2 x 2 plot
     ax1 = plt.subplot(gs[0, 0])
     for k in high_beams:
-        plt.plot(D[k]['x']/1e3, np.sqrt(D[k]['var']), '.', color=  col_dict[k], markersize=4, label=k)
+        plt.plot(
+            D[k]["x"] / 1e3,
+            np.sqrt(D[k]["var"]),
+            ".",
+            color=col_dict[k],
+            markersize=4,
+            label=k,
+        )
 
-    plt.title('high beams std', loc='left')
-    plt.ylabel('segment std log(m)')
-    
-    ax1.set_yscale('log')
+    plt.title("high beams std", loc="left")
+    plt.ylabel("segment std log(m)")
+
+    ax1.set_yscale("log")
 
     ax2 = plt.subplot(gs[1, 0])
     for k in high_beams:
-        Di = D[k]['N']
-        Di[Di ==0] =np.nan
-        plt.plot(D[k]['x']/1e3, D[k]['N'], '.', color= col_dict[k], markersize=4, label=k)
+        Di = D[k]["N"]
+        Di[Di == 0] = np.nan
+        plt.plot(
+            D[k]["x"] / 1e3, D[k]["N"], ".", color=col_dict[k], markersize=4, label=k
+        )
 
-    plt.title('high beams N', loc='left')
-    plt.xlabel('along track distance (km)')
-    plt.ylabel('Point Density (m)')
+    plt.title("high beams N", loc="left")
+    plt.xlabel("along track distance (km)")
+    plt.ylabel("Point Density (m)")
 
     ax3 = plt.subplot(gs[0, 1])
     for k in low_beams:
-        plt.plot(D[k]['x']/1e3, np.sqrt(D[k]['var']), '.', color=  col_dict[k], markersize=4, label=k)
+        plt.plot(
+            D[k]["x"] / 1e3,
+            np.sqrt(D[k]["var"]),
+            ".",
+            color=col_dict[k],
+            markersize=4,
+            label=k,
+        )
+
+    plt.title("low beams std", loc="left")
 
-    plt.title('low beams std', loc='left')
-    
-    ax3.set_yscale('log')
+    ax3.set_yscale("log")
 
     ax4 = plt.subplot(gs[1, 1])
     for k in low_beams:
-        Di = D[k]['N']
-        Di[Di ==0] =np.nan
-        plt.plot(D[k]['x']/1e3, D[k]['N'], '.', color= col_dict[k], markersize=4, label=k)
+        Di = D[k]["N"]
+        Di[Di == 0] = np.nan
+        plt.plot(
+            D[k]["x"] / 1e3, D[k]["N"], ".", color=col_dict[k], markersize=4, label=k
+        )
 
-    plt.title('low beams N', loc='left')
-    plt.xlabel('along track distance (km)')
-    #plt.ylabel('Point density (m)')
+    plt.title("low beams N", loc="left")
+    plt.xlabel("along track distance (km)")
+    # plt.ylabel('Point density (m)')
 
     ax5 = plt.subplot(gs[0:2, 2])
 
     lat_shift = 0
     for k in low_beams:
         Di = D[k]
-        plt.scatter(Di['x']/1e3, Di['lat']+lat_shift, s= np.exp(Di['N'] *5) , marker='.', color=  col_dict[k],  label=k, alpha = 0.3)
+        plt.scatter(
+            Di["x"] / 1e3,
+            Di["lat"] + lat_shift,
+            s=np.exp(Di["N"] * 5),
+            marker=".",
+            color=col_dict[k],
+            label=k,
+            alpha=0.3,
+        )
         lat_shift = lat_shift + 2
 
     for k in high_beams:
         Di = D[k]
-        plt.scatter(Di['x']/1e3, Di['lat']+lat_shift, s= np.exp(Di['N'] *5) , marker='.', color=  col_dict[k],  label=k, alpha = 0.3)
+        plt.scatter(
+            Di["x"] / 1e3,
+            Di["lat"] + lat_shift,
+            s=np.exp(Di["N"] * 5),
+            marker=".",
+            color=col_dict[k],
+            label=k,
+            alpha=0.3,
+        )
         lat_shift = lat_shift + 2
 
-    plt.title('Density in space', loc='left')
-    plt.ylabel('Latitude (deg)')
-    plt.xlabel('along track distance (km)')
+    plt.title("Density in space", loc="left")
+    plt.ylabel("Latitude (deg)")
+    plt.xlabel("along track distance (km)")
     plt.legend()
     plt.show()
 
+
 ## plot track stats basics for sliderules ATL06 output
 
-def plot_ATL06_track_data( G2, cdict):
+
+def plot_ATL06_track_data(G2, cdict):
     """
     Plots the beam statistics in a 3 x 3 plot
     G2      is a GeoDataFrame from SL (ATL06)
@@ -157,42 +203,50 @@ def plot_ATL06_track_data( G2, cdict):
     ax5 = plt.subplot(gs[1, 2])
     ax6 = plt.subplot(gs[2, 2])
 
-    for sp in G2['spot'].unique():
-        Gc = G2[G2['spot'] == 1]
-
-        Gc['h_mean_gradient'] = np.gradient(Gc['h_mean'])
-        ts_config = {'marker': '.', 'markersize': 0.2, 'linestyle': 'none', 'color': cdict[sp], 'alpha': 0.3}
-        hist_confit = {'density': True, 'color': cdict[sp], 'alpha': 0.3}
-
-        ax1.plot(Gc.geometry.y, Gc['h_mean'], **ts_config)
-        ax2.plot(Gc.geometry.y, Gc['h_mean_gradient'], **ts_config)
-        ax3.plot(Gc.geometry.y, Gc['n_fit_photons'], **ts_config)
-
-        Gc['h_mean'].plot.hist(ax=ax4, bins=30, **hist_confit)
-        Gc['h_mean_gradient'].plot.hist(ax=ax5, bins=np.linspace(-5, 5, 30), **hist_confit)
-        Gc['rms_misfit'].plot.hist(ax=ax6, bins=30, **hist_confit)
-
-    ax1.set_ylabel('h_mean (m)')
-    ax2.set_ylabel('slope (m/m)')
-    ax3.set_ylabel('N Photons')
-    ax3.set_xlabel('Latitude (degree)')
+    for sp in G2["spot"].unique():
+        Gc = G2[G2["spot"] == 1]
+
+        Gc["h_mean_gradient"] = np.gradient(Gc["h_mean"])
+        ts_config = {
+            "marker": ".",
+            "markersize": 0.2,
+            "linestyle": "none",
+            "color": cdict[sp],
+            "alpha": 0.3,
+        }
+        hist_confit = {"density": True, "color": cdict[sp], "alpha": 0.3}
+
+        ax1.plot(Gc.geometry.y, Gc["h_mean"], **ts_config)
+        ax2.plot(Gc.geometry.y, Gc["h_mean_gradient"], **ts_config)
+        ax3.plot(Gc.geometry.y, Gc["n_fit_photons"], **ts_config)
+
+        Gc["h_mean"].plot.hist(ax=ax4, bins=30, **hist_confit)
+        Gc["h_mean_gradient"].plot.hist(
+            ax=ax5, bins=np.linspace(-5, 5, 30), **hist_confit
+        )
+        Gc["rms_misfit"].plot.hist(ax=ax6, bins=30, **hist_confit)
+
+    ax1.set_ylabel("h_mean (m)")
+    ax2.set_ylabel("slope (m/m)")
+    ax3.set_ylabel("N Photons")
+    ax3.set_xlabel("Latitude (degree)")
     ax1.set_xticklabels([])
     ax2.set_xticklabels([])
 
-    ax1.axhline(0, color='k', linestyle='-', linewidth=0.8)
-    ax2.axhline(0, color='k', linestyle='-', linewidth=0.8)
+    ax1.axhline(0, color="k", linestyle="-", linewidth=0.8)
+    ax2.axhline(0, color="k", linestyle="-", linewidth=0.8)
 
-    ax1.set_title('Height', loc='left')
-    ax2.set_title('Slope', loc='left')
-    ax3.set_title('Photons per extend', loc='left')
+    ax1.set_title("Height", loc="left")
+    ax2.set_title("Slope", loc="left")
+    ax3.set_title("Photons per extend", loc="left")
 
-    ax4.set_title('Histograms', loc='left')
-    ax5.set_title('Histograms', loc='left')
+    ax4.set_title("Histograms", loc="left")
+    ax5.set_title("Histograms", loc="left")
 
-    ax6.set_title('Error Hist.', loc='left')
-    ax6.set_xlabel('rms_misfit (m)')
+    ax6.set_title("Error Hist.", loc="left")
+    ax6.set_xlabel("rms_misfit (m)")
 
     for axi in [ax4, ax5, ax6]:
-        axi.set_ylabel('')
+        axi.set_ylabel("")
 
-    return [ax1, ax2, ax3, ax4, ax5, ax6]
\ No newline at end of file
+    return [ax1, ax2, ax3, ax4, ax5, ax6]
diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/io.py b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
similarity index 100%
rename from src/icesat2_tracks/ICEsat2_SI_tools/io.py
rename to src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
diff --git a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
index 8e05ab67..c656a4f4 100644
--- a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
+++ b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
@@ -18,7 +18,7 @@
 )
 from icesat2_tracks.ICEsat2_SI_tools import (
     sliderule_converter_tools as sct,
-    io,
+    iotools as io,
     beam_stats,
 )
 from icesat2_tracks.local_modules import m_tools_ph3 as MT, m_general_ph3 as M

From eca286c251e040a95c709559171afae0ed5189f3 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Tue, 30 Jan 2024 15:52:31 -0500
Subject: [PATCH 11/30] fix: iotools in steps 2-5

---
 .../analysis_db/A02c_IOWAGA_thredds_prior.py  |  5 ++--
 .../analysis_db/B02_make_spectra_gFT.py       |  4 +--
 .../analysis_db/B03_plot_spectra_ov.py        | 25 +++++++++++--------
 src/icesat2_tracks/analysis_db/B04_angle.py   |  2 +-
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py b/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py
index 8e4fa11e..1fa8068a 100644
--- a/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py
+++ b/src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py
@@ -10,7 +10,7 @@
 from siphon.catalog import TDSCatalog
 
 from icesat2_tracks.config.IceSAT2_startup import mconfig
-import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io
 import icesat2_tracks.ICEsat2_SI_tools.wave_tools as waves
 import icesat2_tracks.local_modules.m_tools_ph3 as MT
 import icesat2_tracks.local_modules.m_general_ph3 as M
@@ -420,6 +420,7 @@ def test_nan_frac(imask):
 except:
     target_name = "A02_" + track_name + "_hindcast_fail"
 
+
 def plot_prior(Prior, axx):
     angle = Prior["incident_angle"][
         "value"
@@ -531,7 +532,7 @@ def plot_prior(Prior, axx):
     ax1.axis("equal")
 
     F.save_pup(path=plot_path, name=plot_name + "_hindcast_prior")
-except  Exception as e:
+except Exception as e:
     print(e)
     print("print 2nd figure failed")
 
diff --git a/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py b/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py
index 00da0ff5..a9cb6b90 100644
--- a/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py
+++ b/src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py
@@ -14,7 +14,7 @@
 import xarray as xr
 
 import h5py
-import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io
 import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec
 
 import time
@@ -31,7 +31,7 @@
 import tracemalloc
 
 
-def linear_gap_fill(F,key_lead, key_int):
+def linear_gap_fill(F, key_lead, key_int):
     """
     F pd.DataFrame
     key_lead   key in F that determined the independent coordindate
diff --git a/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py b/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py
index ca7984d2..bae0de4a 100644
--- a/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py
+++ b/src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py
@@ -6,14 +6,19 @@
 import numpy as np
 import xarray as xr
 from matplotlib.gridspec import GridSpec
-import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io
 import icesat2_tracks.ICEsat2_SI_tools.generalized_FT as gFT
 import icesat2_tracks.local_modules.m_tools_ph3 as MT
 from icesat2_tracks.local_modules import m_general_ph3 as M
-from icesat2_tracks.config.IceSAT2_startup import mconfig, color_schemes, plt, font_for_print
+from icesat2_tracks.config.IceSAT2_startup import (
+    mconfig,
+    color_schemes,
+    plt,
+    font_for_print,
+)
 
 track_name, batch_key, test_flag = io.init_from_input(
-    sys.argv # TODO: Handle via CLI
+    sys.argv  # TODO: Handle via CLI
 )  # loads standard experiment
 hemis, batch = batch_key.split("_")
 
@@ -21,7 +26,7 @@
 load_file = load_path + "B02_" + track_name
 plot_path = (
     mconfig["paths"]["plot"] + "/" + hemis + "/" + batch_key + "/" + track_name + "/"
-) # TODO: Update with pathlib
+)  # TODO: Update with pathlib
 MT.mkdirs_r(plot_path)
 
 Gk = xr.open_dataset(load_file + "_gFT_k.nc")
@@ -481,11 +486,11 @@ def plot_model_eta(D, ax, offset=0, **kargs):
             dd = Gk_1.gFT_PSD_data.rolling(k=10, min_periods=1, center=True).mean()
             plt.plot(Gk_1.k, dd, color=col_d[k], linewidth=0.8)
             # handle the 'All-NaN slice encountered' warning
-            if np.all(np.isnan(dd.data)): 
+            if np.all(np.isnan(dd.data)):
                 dd_max.append(np.nan)
             else:
                 dd_max.append(np.nanmax(dd.data))
-                
+
             plt.xlim(klim)
             if lflag:
                 plt.ylabel("$(m/m)^2/k$")
@@ -495,11 +500,11 @@ def plot_model_eta(D, ax, offset=0, **kargs):
 
         ax11.axvline(k_thresh, linewidth=1, color="gray", alpha=1)
         ax11.axvspan(k_thresh, klim[-1], color="gray", alpha=0.5, zorder=12)
-    
+
     if not np.all(np.isnan(dd_max)):
-            max_vale = np.nanmax(dd_max)
-            for ax in ax1_list:
-                ax.set_ylim(0,max_vale  * 1.1)
+        max_vale = np.nanmax(dd_max)
+        for ax in ax1_list:
+            ax.set_ylim(0, max_vale * 1.1)
 
     ax0 = F.fig.add_subplot(gs[-2:, :])
 
diff --git a/src/icesat2_tracks/analysis_db/B04_angle.py b/src/icesat2_tracks/analysis_db/B04_angle.py
index d4067845..0b1e269b 100644
--- a/src/icesat2_tracks/analysis_db/B04_angle.py
+++ b/src/icesat2_tracks/analysis_db/B04_angle.py
@@ -14,7 +14,7 @@
 
 
 import h5py
-import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io
 import xarray as xr
 import numpy as np
 

From cf2a1d3548f0434b2d009412d9e51165a0055db7 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Tue, 30 Jan 2024 16:08:25 -0500
Subject: [PATCH 12/30] refactor: move imports to the top iotools.py module

---
 .../ICEsat2_SI_tools/iotools.py               | 61 +++++--------------
 1 file changed, 14 insertions(+), 47 deletions(-)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
index 193229de..468fb21f 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
@@ -1,5 +1,19 @@
+import os
+import re
+import json
+import warnings
+from datetime import datetime
+from netrc import netrc
+from lxml import etree
+from posixpath import join as posixpath_join
+from pandas import HDFStore
+from pandas.io.pytables import PerformanceWarning
+import pandas as pd
+import h5py
 from sliderule import icesat2
 from icesat2_tracks.ICEsat2_SI_tools import sliderule_converter_tools as sct
+import icesat2_toolkit.utilities
+import icesat2_tracks.ICEsat2_SI_tools.convert_GPS_time as cGPS
 
 
 def init_from_input(arguments):
@@ -73,8 +87,6 @@ def init_data(ID_name, batch_key, ID_flag, ID_root, prefix="A01b_ID"):
 
 
 def ID_to_str(ID_name):
-    from datetime import datetime
-
     IDs = ID_name.split("_")
     date = datetime.strptime(IDs[1], "%Y%m%d").strftime("%Y-%m-%d")
     return IDs[0] + " " + date + " granule: " + IDs[2]
@@ -107,8 +119,6 @@ class case_ID:
     """docstring for case_ID"""
 
     def __init__(self, track_name):
-        import re
-
         track_name_pattern = r"(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?"
 
         track_name_rx = re.compile(track_name_pattern)
@@ -219,12 +229,6 @@ def nsidc_icesat2_get_associated_file(
     ATL03, (or, ATL10, ATL07, not tested)
 
     """
-    import netrc
-    import lxml
-    import re
-    import posixpath
-    import os
-    import icesat2_toolkit.utilities
 
     AUXILIARY = False
     DIRECTORY = None
@@ -307,9 +311,6 @@ def nsidc_icesat2_get_associated_file(
 
 
 def json_load(name, path, verbose=False):
-    import json
-    import os
-
     full_name = os.path.join(path, name + ".json")
 
     with open(full_name, "r") as ifile:
@@ -329,7 +330,6 @@ def ATL03_download(username, password, dpath, product_directory, sd, file_name):
     sd                  '2019.03.01'- subdirectory on ATLAS
     file_name           'ATL03_20190301010737_09560204_005_01.h5' - filename in subdirectory
     """
-    import icesat2_toolkit.utilities
 
     HOST = ["https://n5eil01u.ecs.nsidc.org", "ATLAS", product_directory, sd, file_name]
     print("download to:", dpath + "/" + HOST[-1])
@@ -346,15 +346,9 @@ def ATL03_download(username, password, dpath, product_directory, sd, file_name):
 
 
 def save_pandas_table(table_dict, name, save_path):
-    import os
-
     if not os.path.exists(save_path):
         os.makedirs(save_path)
 
-    import warnings
-    from pandas import HDFStore
-    from pandas.io.pytables import PerformanceWarning
-
     warnings.filterwarnings("ignore", category=PerformanceWarning)
 
     with HDFStore(save_path + "/" + name + ".h5") as store:
@@ -363,10 +357,6 @@ def save_pandas_table(table_dict, name, save_path):
 
 
 def load_pandas_table_dict(name, save_path):
-    import warnings
-    from pandas import HDFStore
-    from pandas.io.pytables import PerformanceWarning
-
     warnings.filterwarnings("ignore", category=PerformanceWarning)
 
     return_dict = dict()
@@ -378,8 +368,6 @@ def load_pandas_table_dict(name, save_path):
 
 
 def get_beam_hdf_store(ATL03_k):
-    import pandas as pd
-
     DD = pd.DataFrame()  # columns = ATL03.keys())
     for ikey in ATL03_k.keys():
         DD[ikey] = ATL03_k[ikey]
@@ -388,17 +376,12 @@ def get_beam_hdf_store(ATL03_k):
 
 
 def get_beam_var_hdf_store(ATL03_k, ikey):
-    import pandas as pd
-
     DD = pd.DataFrame()  # columns = ATL03.keys())
     DD[ikey] = ATL03_k[ikey]
     return DD
 
 
 def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"):
-    import os
-    import h5py
-
     mode = "w" if mode is None else mode
     if not os.path.exists(path):
         os.makedirs(path)
@@ -420,8 +403,6 @@ def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"):
 
 def get_time_for_track(delta_time, atlas_epoch):
     "returns pandas dataframe"
-    import pandas as pd
-    import icesat2_tracks.ICEsat2_SI_tools.convert_GPS_time as cGPS
 
     # Conversion of delta_time to a calendar date
     temp = cGPS.convert_GPS_time(atlas_epoch[0] + delta_time, OFFSET=0.0)
@@ -446,8 +427,6 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
     beam    key of the iceSAT2 beam.
     """
     # Add in a proper description of the function here
-    import h5py
-    import pandas as pd
 
     # Open the file
     ATL03 = h5py.File(fileT, "r")
@@ -556,9 +535,6 @@ def getATL03_height_correction(fileT, beam="gt1r"):
     """
     # Add in a proper description of the function here
 
-    import h5py
-    import pandas as pd
-
     # Open the file
     ATL03 = h5py.File(fileT, "r")
 
@@ -585,9 +561,6 @@ def getATL07_beam(fileT, beam="gt1r", maxElev=1e6):
     """
     # Add in a proper description of the function here
 
-    import h5py
-    import pandas as pd
-
     # Open the file
     ATL07 = h5py.File(fileT, "r")
 
@@ -669,9 +642,6 @@ def getATL10_beam(fileT, beam="gt1r", maxElev=1e6):
     """
     # Add in a proper description of the function here
 
-    import h5py
-    import pandas as pd
-
     # Open the file
     ATL07 = h5py.File(fileT, "r")
 
@@ -756,9 +726,6 @@ def getATL07_height_corrections(fileT, beam="gt1r"):
     """
     # Add in a proper description of the function here
 
-    import h5py
-    import pandas as pd
-
     # Open the file
     ATL07 = h5py.File(fileT, "r")
 

From 24c49962f1f99e015419a42af4f9ed60875bcf8e Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Tue, 30 Jan 2024 17:20:56 -0500
Subject: [PATCH 13/30] refactor: iotools.py for better readability and
 efficiency

- Refactor getATL03_beam: Simplified the logic of the function to improve readability and performance

- Update case_ID class: Add comments to clarify the purpose and functionality of the class. Also, simplified some of its logic for better performance and readability

- Use pathlib where appropriate
---
 .../ICEsat2_SI_tools/iotools.py               | 102 +++++++++---------
 1 file changed, 48 insertions(+), 54 deletions(-)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
index 468fb21f..e92ee495 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/iotools.py
@@ -1,6 +1,7 @@
 import os
 import re
 import json
+from pathlib import Path
 import warnings
 from datetime import datetime
 from netrc import netrc
@@ -108,7 +109,7 @@ def get_atl06p(ATL03_track_name, params_yapc, maximum_height):
     gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name])
 
     if gdf.empty:
-        raise Exception("Empty Geodataframe. No data could be retrieved.")
+        raise ValueError("Empty Geodataframe. No data could be retrieved.")
 
     print("Initial data retrieved")
     gdf = sct.correct_and_remove_height(gdf, maximum_height)
@@ -121,28 +122,30 @@ class case_ID:
     def __init__(self, track_name):
         track_name_pattern = r"(\D{2}|\d{2})_?(\d{4})(\d{2})(\d{2})(\d{2})?(\d{2})?(\d{2})?_(\d{4})(\d{2})(\d{2})_?(\d{3})?_?(\d{2})?"
 
+        # Compile the regular expression pattern for track names
         track_name_rx = re.compile(track_name_pattern)
+
+        # Use the compiled regular expression to find all matches in the track name
+        # The pop() method is used to get the last (or only) match
+        # The result is a tuple, which is unpacked into several properties of the current object
         (
-            self.hemis,
-            self.YY,
-            self.MM,
-            self.DD,
-            self.HH,
-            self.MN,
-            self.SS,
-            self.TRK,
-            self.CYC,
-            self.GRN,
-            self.RL,
-            self.VRS,
+            self.hemis,  # Hemisphere
+            self.YY,  # Year
+            self.MM,  # Month
+            self.DD,  # Day
+            self.HH,  # Hour
+            self.MN,  # Minute
+            self.SS,  # Second
+            self.TRK,  # Track
+            self.CYC,  # Cycle
+            self.GRN,  # Granule
+            self.RL,  # Release
+            self.VRS,  # Version
         ) = track_name_rx.findall(track_name).pop()
 
-        if self.hemis == "01":
-            self.hemis = "NH"
-        elif self.hemis == "02":
-            self.hemis = "SH"
-        else:
-            self.hemis = self.hemis
+        hemis_map = {"01": "NH", "02": "SH"}
+        self.hemis = hemis_map.get(self.hemis, self.hemis)
+
         self.set()
         self.track_name_init = track_name
 
@@ -346,12 +349,12 @@ def ATL03_download(username, password, dpath, product_directory, sd, file_name):
 
 
 def save_pandas_table(table_dict, name, save_path):
-    if not os.path.exists(save_path):
-        os.makedirs(save_path)
+    save_path = Path(save_path)
+    save_path.mkdir(parents=True, exist_ok=True)
 
     warnings.filterwarnings("ignore", category=PerformanceWarning)
 
-    with HDFStore(save_path + "/" + name + ".h5") as store:
+    with HDFStore(save_path / f"{name}.h5") as store:
         for name, table in table_dict.items():
             store[name] = table
 
@@ -382,23 +385,18 @@ def get_beam_var_hdf_store(ATL03_k, ikey):
 
 
 def write_track_to_HDF5(data_dict, name, path, verbose=False, mode="w"):
-    mode = "w" if mode is None else mode
-    if not os.path.exists(path):
-        os.makedirs(path)
+    path = Path(path)
+    path.mkdir(parents=True, exist_ok=True)
 
-    full_name = os.path.join(path, name + ".h5")
-    store = h5py.File(full_name, mode)
-
-    for k in data_dict.keys():
-        store1 = store.create_group(k)
-        for kk, I in list(data_dict[k].items()):
-            store1[kk] = I
-        # store1.close()
-
-    store.close()
+    full_name = path / (name + ".h5")
+    with h5py.File(str(full_name), mode) as store:
+        for k in data_dict.keys():
+            store1 = store.create_group(k)
+            for kk, I in list(data_dict[k].items()):
+                store1[kk] = I
 
     if verbose:
-        print("saved at: " + full_name)
+        print(f"saved at: {full_name}")
 
 
 def get_time_for_track(delta_time, atlas_epoch):
@@ -452,7 +450,6 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
 
     # Photon height
     heights = ATL03[beam + "/heights/h_ph"][:]
-    # print(heights.shape)
 
     # Flag for signal confidence
     # column index:  0=Land; 1=Ocean; 2=SeaIce; 3=LandIce; 4=InlandWater
@@ -464,25 +461,25 @@ def getATL03_beam(fileT, numpy=False, beam="gt1l", maxElev=1e6):
     # --  3: medium
     # --  4: high
 
-    mask_ocean = (
-        ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 2
-    )  # ocean points  medium or high quality
-    mask_seaice = (
-        ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 2
-    )  # sea ice points medium or high quality
+    heighs_signal_conf_ph = "/heights/signal_conf_ph"
+    quality_threshold = 2
+    beam_data = ATL03[beam + heighs_signal_conf_ph]
+
+    # ocean points  medium or high quality
+    mask_ocean = beam_data[:, 1] > quality_threshold
+    # sea ice points medium or high quality
+    mask_seaice = beam_data[:, 2] > quality_threshold
     mask_total = mask_seaice | mask_ocean
 
-    if sum(~mask_total) == (ATL03[beam + "/heights/signal_conf_ph"][:, 1]).size:
+    if sum(~mask_total) == beam_data[:, 1].size:
         print("zero photons, lower photon quality to 2 or higher")
-        mask_ocean = (
-            ATL03[beam + "/heights/signal_conf_ph"][:, 1] > 1
-        )  # ocean points  medium or high quality
-        mask_seaice = (
-            ATL03[beam + "/heights/signal_conf_ph"][:, 2] > 1
-        )  # sea ice points medium or high quality
+        # lower quality threshold and recompute
+        quality_threshold = 1
+        mask_ocean = beam_data[:, 1] > quality_threshold
+        mask_seaice = beam_data[:, 2] > quality_threshold
         mask_total = mask_seaice | mask_ocean
 
-    signal_confidence = ATL03[beam + "/heights/signal_conf_ph"][:, 1:3].max(1)
+    signal_confidence = ATL03[beam + heighs_signal_conf_ph][:, 1:3].max(1)
 
     ATL03.close()
 
@@ -600,7 +597,6 @@ def getATL07_beam(fileT, beam="gt1r", maxElev=1e6):
         "height_segment_w_gaussian",  # Width of Gaussian fit
         "height_segment_quality",  # Height quality flag, 1 for good fit, 0 for bad
     ]
-    # vars = ['beam_fb_height', 'beam_fb_sigma' , 'beam_fb_confidence' , 'beam_fb_quality_flag']
 
     D_heights = dict()
     for var in vars:
@@ -621,7 +617,6 @@ def getATL07_beam(fileT, beam="gt1r", maxElev=1e6):
         D_env[var] = ATL07[beam + "/sea_ice_segments/" + I][:]
     dF_env = pd.DataFrame(D_env)
 
-    # Df = pd.concat({k: pd.DataFrame(v).T for k, v in data.items()}, axis=0)
     DF = pd.concat(
         {"time": dF_time, "ref": dF_bulk, "heights": dF_heights, "env": dF_env}, axis=1
     )
@@ -646,7 +641,6 @@ def getATL10_beam(fileT, beam="gt1r", maxElev=1e6):
     ATL07 = h5py.File(fileT, "r")
 
     ### bulk positions and statistics
-    # f['gt1r/freeboard_beam_segment/beam_freeboard'].keys()
 
     vars_bulk = [
         "seg_dist_x",

From d53246b32976e6372faaa19180b52edcbb3568e7 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Wed, 31 Jan 2024 13:45:08 -0500
Subject: [PATCH 14/30] fix: remove non-cli commands

---
 .github/workflows/test-B01_SL_load_single_file.yml | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/.github/workflows/test-B01_SL_load_single_file.yml b/.github/workflows/test-B01_SL_load_single_file.yml
index e56b6389..aefe5e90 100644
--- a/.github/workflows/test-B01_SL_load_single_file.yml
+++ b/.github/workflows/test-B01_SL_load_single_file.yml
@@ -25,11 +25,4 @@ jobs:
         run: pip list
       - name: first step B01_SL_load_single_file
         run: python src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py --track-name 20190502052058_05180312_005_01 --batch-key SH_testSLsinglefile2 --output-dir ./work
-      - name: second step make_spectra
-        run: python src/icesat2_tracks/analysis_db/B02_make_spectra_gFT.py SH_20190502_05180312 SH_testSLsinglefile2 True
-      - name: third step plot_spectra
-        run: python src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py SH_20190502_05180312 SH_testSLsinglefile2 True
-      - name: fourth step IOWAGA thredds
-        run: python src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py SH_20190502_05180312 SH_testSLsinglefile2 True
-      - name: Fifth step B04_angle
-        run: python src/icesat2_tracks/analysis_db/B04_angle.py SH_20190502_05180312 SH_testSLsinglefile2 True
+        

From 3e9729b45299fd3f1a132afbacd799f930b93a7f Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Thu, 1 Feb 2024 08:35:51 -0500
Subject: [PATCH 15/30] rollback changes of B06_correct_separate_var file

---
 .../analysis_db/B06_correct_separate_var.py   | 775 ------------------
 1 file changed, 775 deletions(-)
 delete mode 100644 src/icesat2_tracks/analysis_db/B06_correct_separate_var.py

diff --git a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
deleted file mode 100644
index 2174b071..00000000
--- a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
+++ /dev/null
@@ -1,775 +0,0 @@
-import os, sys
-
-
-"""
-This file open a ICEsat2 track applied filters and corections and returns smoothed photon heights on a regular grid in an .nc file.
-This is python 3
-"""
-from icesat2_tracks.config.IceSAT2_startup import (
-    mconfig,
-    xr,
-    color_schemes,
-    font_for_pres,
-    font_for_print,
-    plt,
-    np,
-    lstrings,
-    fig_sizes,
-)
-
-
-import h5py
-import icesat2_tracks.ICEsat2_SI_tools.io as io
-import icesat2_tracks.local_modules.m_tools_ph3 as MT
-from icesat2_tracks.local_modules import m_general_ph3 as M
-import time
-import copy
-import icesat2_tracks.ICEsat2_SI_tools.generalized_FT as gFT
-from scipy.ndimage.measurements import label
-import pandas as pd
-from matplotlib.gridspec import GridSpec
-
-xr.set_options(display_style="text")
-ID_name, batch_key, test_flag = io.init_from_input(sys.argv)
-hemis, batch = batch_key.split("_")
-
-all_beams = mconfig["beams"]["all_beams"]
-high_beams = mconfig["beams"]["high_beams"]
-low_beams = mconfig["beams"]["low_beams"]
-
-load_path_work = mconfig["paths"]["work"] + "/" + batch_key + "/"
-B3_hdf5 = h5py.File(
-    load_path_work + "B01_regrid" + "/" + ID_name + "_B01_binned.h5", "r"
-)
-
-
-load_path_angle = mconfig["paths"]["work"] + "/" + batch_key + "/B04_angle/"
-
-B3 = dict()
-for b in all_beams:
-    B3[b] = io.get_beam_hdf_store(B3_hdf5[b])
-
-B3_hdf5.close()
-
-load_file = load_path_work + "/B02_spectra/" + "B02_" + ID_name  # + '.nc'
-Gk = xr.open_dataset(load_file + "_gFT_k.nc")
-Gx = xr.open_dataset(load_file + "_gFT_x.nc")
-Gfft = xr.open_dataset(load_file + "_FFT.nc")
-
-plot_path = (
-    mconfig["paths"]["plot"]
-    + "/"
-    + hemis
-    + "/"
-    + batch_key
-    + "/"
-    + ID_name
-    + "/B06_correction/"
-)
-MT.mkdirs_r(plot_path)
-
-save_path = mconfig["paths"]["work"] + batch_key + "/B06_corrected_separated/"
-MT.mkdirs_r(save_path)
-
-
-color_schemes.colormaps2(31, gamma=1)
-col_dict = color_schemes.rels
-
-
-def dict_weighted_mean(Gdict, weight_key):
-    """
-    returns the weighted meean of a dict of xarray, data_arrays
-    weight_key must be in the xr.DataArrays
-    """
-
-    akey = list(Gdict.keys())[0]
-    GSUM = Gdict[akey].copy()
-    GSUM.data = np.zeros(GSUM.shape)
-    N_per_stancil = GSUM.N_per_stancil * 0
-    N_photons = np.zeros(GSUM.N_per_stancil.size)
-
-    counter = 0
-    for k, I in Gdict.items():
-        I = I.squeeze()
-        print(len(I.x))
-        if len(I.x) != 0:
-            GSUM += I.where(~np.isnan(I), 0) * I[weight_key]
-            N_per_stancil += I[weight_key]
-        if "N_photons" in GSUM.coords:
-            N_photons += I["N_photons"]
-        counter += 1
-
-    GSUM = GSUM / N_per_stancil
-
-    if "N_photons" in GSUM.coords:
-        GSUM.coords["N_photons"] = (("x", "beam"), np.expand_dims(N_photons, 1))
-
-    GSUM["beam"] = ["weighted_mean"]
-    GSUM.name = "power_spec"
-
-    return GSUM
-
-
-G_gFT_wmean = (Gk.where(~np.isnan(Gk["gFT_PSD_data"]), 0) * Gk["N_per_stancil"]).sum(
-    "beam"
-) / Gk["N_per_stancil"].sum("beam")
-G_gFT_wmean["N_photons"] = Gk["N_photons"].sum("beam")
-
-G_fft_wmean = (Gfft.where(~np.isnan(Gfft), 0) * Gfft["N_per_stancil"]).sum(
-    "beam"
-) / Gfft["N_per_stancil"].sum("beam")
-G_fft_wmean["N_per_stancil"] = Gfft["N_per_stancil"].sum("beam")
-
-
-# plot
-# derive spectral errors:
-Lpoints = Gk.Lpoints.mean("beam").data
-N_per_stancil = Gk.N_per_stancil.mean("beam").data  # [0:-2]
-
-G_error_model = dict()
-G_error_data = dict()
-
-for bb in Gk.beam.data:
-    I = Gk.sel(beam=bb)
-    b_bat_error = np.concatenate([I.model_error_k_cos.data, I.model_error_k_sin.data])
-    Z_error = gFT.complex_represenation(b_bat_error, Gk.k.size, Lpoints)
-    PSD_error_data, PSD_error_model = gFT.Z_to_power_gFT(
-        Z_error, np.diff(Gk.k)[0], N_per_stancil, Lpoints
-    )
-
-    G_error_model[bb] = xr.DataArray(
-        data=PSD_error_model,
-        coords=I.drop("N_per_stancil").coords,
-        name="gFT_PSD_data_error",
-    ).expand_dims("beam")
-    G_error_data[bb] = xr.DataArray(
-        data=PSD_error_data,
-        coords=I.drop("N_per_stancil").coords,
-        name="gFT_PSD_data_error",
-    ).expand_dims("beam")
-
-gFT_PSD_data_error_mean = xr.concat(G_error_model.values(), dim="beam")
-gFT_PSD_data_error_mean = xr.concat(G_error_data.values(), dim="beam")
-
-gFT_PSD_data_error_mean = (
-    gFT_PSD_data_error_mean.where(~np.isnan(gFT_PSD_data_error_mean), 0)
-    * Gk["N_per_stancil"]
-).sum("beam") / Gk["N_per_stancil"].sum("beam")
-gFT_PSD_data_error_mean = (
-    gFT_PSD_data_error_mean.where(~np.isnan(gFT_PSD_data_error_mean), 0)
-    * Gk["N_per_stancil"]
-).sum("beam") / Gk["N_per_stancil"].sum("beam")
-
-G_gFT_wmean["gFT_PSD_data_err"] = gFT_PSD_data_error_mean
-G_gFT_wmean["gFT_PSD_data_err"] = gFT_PSD_data_error_mean
-
-Gk["gFT_PSD_data_err"] = xr.concat(G_error_model.values(), dim="beam")
-Gk["gFT_PSD_data_err"] = xr.concat(G_error_data.values(), dim="beam")
-
-
-#
-
-G_gFT_smth = (
-    G_gFT_wmean["gFT_PSD_data"].rolling(k=30, center=True, min_periods=1).mean()
-)
-G_gFT_smth["N_photons"] = G_gFT_wmean.N_photons
-G_gFT_smth["N_per_stancil_fraction"] = Gk["N_per_stancil"].T.mean(
-    "beam"
-) / Gk.Lpoints.mean("beam")
-
-k = G_gFT_smth.k
-
-F = M.figure_axis_xy()
-
-plt.loglog(k, G_gFT_smth / k)
-
-plt.title("displacement power Spectra", loc="left")
-
-
-def define_noise_wavenumber_tresh_simple(
-    data_xr, k_peak, k_end_lim=None, plot_flag=False
-):
-    """
-    returns noise wavenumber on the high end of a spectral peak. This method fits a straight line in loglog speace using robust regression.
-    The noise level is defined as the wavenumber at which the residual error of a linear fit to the data is minimal.
-
-    inputs:
-    data_xr xarray.Dataarray with the power spectra with k as dimension
-    k_peak  wavenumber above which the searh should start
-    dk      the intervall over which the regrssion is repeated
-
-    returns:
-    k_end   the wavenumber at which the spectrum flattens
-    m       slope of the fitted line
-    b       intersect of the fitted line
-    """
-    from scipy.ndimage.measurements import label
-
-    if k_end_lim is None:
-        k_end_lim = data_xr.k[-1]
-
-    k_lead_peak_margin = k_peak * 1.05
-    try:
-        data_log = (
-            np.log(data_xr)
-            .isel(k=(data_xr.k > k_lead_peak_margin))
-            .rolling(k=10, center=True, min_periods=1)
-            .mean()
-        )
-
-    except:
-        data_log = (
-            np.log(data_xr)
-            .isel(k=(data_xr.k > k_lead_peak_margin / 2))
-            .rolling(k=10, center=True, min_periods=1)
-            .mean()
-        )
-
-    k_log = np.log(data_log.k)
-    try:
-        d_grad = (
-            data_log.differentiate("k").rolling(k=40, center=True, min_periods=4).mean()
-        )
-    except:
-        d_grad = (
-            data_log.differentiate("k").rolling(k=20, center=True, min_periods=2).mean()
-        )
-    ll = label(d_grad >= -5)
-
-    if ll[0][0] != 0:
-        print("no decay, set to peak")
-        return k_peak
-
-    if sum(ll[0]) == 0:
-        k_end = d_grad.k[-1]
-    else:
-        k_end = d_grad.k[(ll[0] == 1)][0].data
-
-    if plot_flag:
-        plt.plot(np.log(data_xr.k), np.log(data_xr))
-        plt.plot(k_log, data_log)
-        plt.plot([np.log(k_end), np.log(k_end)], [-6, -5])
-    return k_end
-
-
-# new version
-def get_correct_breakpoint(pw_results):
-    br_points = list()
-    for i in pw_results.keys():
-        [br_points.append(i) if "breakpoint" in i else None]
-    br_points_df = pw_results[br_points]
-    br_points_sorted = br_points_df.sort_values()
-
-    alphas_sorted = [
-        i.replace("breakpoint", "alpha") for i in br_points_df.sort_values().index
-    ]
-    alphas_sorted.append("alpha" + str(len(alphas_sorted) + 1))
-
-    betas_sorted = [
-        i.replace("breakpoint", "beta") for i in br_points_df.sort_values().index
-    ]
-
-    # betas_sorted
-    alphas_v2 = list()
-    alpha_i = pw_results["alpha1"]
-    for i in [0] + list(pw_results[betas_sorted]):
-        alpha_i += i
-        alphas_v2.append(alpha_i)
-
-    alphas_v2_sorted = pd.Series(index=alphas_sorted, data=alphas_v2)
-    br_points_sorted["breakpoint" + str(br_points_sorted.size + 1)] = "end"
-
-    print("all alphas")
-    print(alphas_v2_sorted)
-    slope_mask = alphas_v2_sorted < 0
-
-    if sum(slope_mask) == 0:
-        print("no negative slope found, set to lowest")
-        breakpoint = "start"
-    else:
-        # take steepest slope
-        alpah_v2_sub = alphas_v2_sorted[slope_mask]
-        print(alpah_v2_sub)
-        print(alpah_v2_sub.argmin())
-        break_point_name = alpah_v2_sub.index[alpah_v2_sub.argmin()].replace(
-            "alpha", "breakpoint"
-        )
-
-        # take first slope
-        breakpoint = br_points_sorted[break_point_name]
-
-    return breakpoint
-
-
-def get_breakingpoints(xx, dd):
-    import piecewise_regression
-
-    x2, y2 = xx, dd
-    convergence_flag = True
-    n_breakpoints = 3
-    while convergence_flag:
-        pw_fit = piecewise_regression.Fit(x2, y2, n_breakpoints=n_breakpoints)
-        print("n_breakpoints", n_breakpoints, pw_fit.get_results()["converged"])
-        convergence_flag = not pw_fit.get_results()["converged"]
-        n_breakpoints += 1
-        if n_breakpoints >= 4:
-            convergence_flag = False
-
-    pw_results = pw_fit.get_results()
-
-    if pw_results["converged"]:
-        pw_results_df = pd.DataFrame(pw_results["estimates"]).loc["estimate"]
-
-        breakpoint = get_correct_breakpoint(pw_results_df)
-
-        return pw_fit, breakpoint
-
-    else:
-        return pw_fit, False
-
-
-def define_noise_wavenumber_piecewise(data_xr, plot_flag=False):
-    data_log = data_xr
-    data_log = np.log(data_xr)
-
-    k = data_log.k.data
-    k_log = np.log(k)
-
-    pw_fit, breakpoint_log = get_breakingpoints(k_log, data_log.data)
-
-    if breakpoint_log is "start":
-        print("no decay, set to lowerst wavenumber")
-        breakpoint_log = k_log[0]
-    if (breakpoint_log is "end") | (breakpoint_log is False):
-        print("higest wavenumner")
-        breakpoint_log = k_log[-1]
-
-    breakpoint_pos = abs(k_log - breakpoint_log).argmin()
-    breakpoint_k = k[breakpoint_pos]
-
-    if plot_flag:
-        pw_fit.plot()
-        plt.plot(k_log, data_log)
-
-    return breakpoint_k, pw_fit
-
-
-k_lim_list = list()
-k_end_previous = np.nan
-x = G_gFT_smth.x.data[0]
-k = G_gFT_smth.k.data
-
-for x in G_gFT_smth.x.data:
-    print(x)
-    # use displacement power spectrum
-    k_end, pw_fit = define_noise_wavenumber_piecewise(
-        G_gFT_smth.sel(x=x) / k, plot_flag=False
-    )
-
-    k_save = k_end_previous if k_end == k[0] else k_end
-    k_end_previous = k_save
-    k_lim_list.append(k_save)
-    print("--------------------------")
-
-font_for_pres()
-G_gFT_smth.coords["k_lim"] = ("x", k_lim_list)
-G_gFT_smth.k_lim.plot()
-k_lim_smth = G_gFT_smth.k_lim.rolling(x=3, center=True, min_periods=1).mean()
-k_lim_smth.plot(c="r")
-
-plt.title("k_c filter", loc="left")
-F.save_light(path=plot_path, name=str(ID_name) + "_B06_atten_ov")
-
-G_gFT_smth["k_lim"] = k_lim_smth
-G_gFT_wmean.coords["k_lim"] = k_lim_smth
-
-font_for_print()
-
-fn = copy.copy(lstrings)
-F = M.figure_axis_xy(
-    fig_sizes["two_column"][0],
-    fig_sizes["two_column"][0] * 0.9,
-    container=True,
-    view_scale=1,
-)
-
-
-plt.suptitle(
-    "Cut-off Frequency for Displacement Spectral\n" + io.ID_to_str(ID_name), y=0.97
-)
-gs = GridSpec(8, 3, wspace=0.1, hspace=1.5)
-
-k_lims = G_gFT_wmean.k_lim
-xlims = G_gFT_wmean.k[0], G_gFT_wmean.k[-1]
-#
-k = high_beams[0]
-for pos, k, pflag in zip(
-    [gs[0:2, 0], gs[0:2, 1], gs[0:2, 2]], high_beams, [True, False, False]
-):
-    ax0 = F.fig.add_subplot(pos)
-    Gplot = (
-        Gk.sel(beam=k)
-        .isel(x=slice(0, -1))
-        .gFT_PSD_data.squeeze()
-        .rolling(k=20, x=2, min_periods=1, center=True)
-        .mean()
-    )
-    Gplot = Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)
-    alpha_range = iter(np.linspace(1, 0, Gplot.x.data.size))
-    for x in Gplot.x.data:
-        ialpha = next(alpha_range)
-        plt.loglog(
-            Gplot.k,
-            Gplot.sel(x=x) / Gplot.k,
-            linewidth=0.5,
-            color=color_schemes.rels[k],
-            alpha=ialpha,
-        )
-        ax0.axvline(
-            k_lims.sel(x=x), linewidth=0.4, color="black", zorder=0, alpha=ialpha
-        )
-
-    plt.title(next(fn) + k, color=col_dict[k], loc="left")
-    plt.xlim(xlims)
-    #
-    if pflag:
-        ax0.tick_params(labelbottom=False, bottom=True)
-        plt.ylabel("Power (m$^2$/k')")
-        plt.legend()
-    else:
-        ax0.tick_params(labelbottom=False, bottom=True, labelleft=False)
-
-for pos, k, pflag in zip(
-    [gs[2:4, 0], gs[2:4, 1], gs[2:4, 2]], low_beams, [True, False, False]
-):
-    ax0 = F.fig.add_subplot(pos)
-    Gplot = (
-        Gk.sel(beam=k)
-        .isel(x=slice(0, -1))
-        .gFT_PSD_data.squeeze()
-        .rolling(k=20, x=2, min_periods=1, center=True)
-        .mean()
-    )
-
-    Gplot = Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)
-
-    alpha_range = iter(np.linspace(1, 0, Gplot.x.data.size))
-    for x in Gplot.x.data:
-        ialpha = next(alpha_range)
-        plt.loglog(
-            Gplot.k,
-            Gplot.sel(x=x) / Gplot.k,
-            linewidth=0.5,
-            color=color_schemes.rels[k],
-            alpha=ialpha,
-        )
-        ax0.axvline(
-            k_lims.sel(x=x), linewidth=0.4, color="black", zorder=0, alpha=ialpha
-        )
-
-    plt.title(next(fn) + k, color=col_dict[k], loc="left")
-    plt.xlim(xlims)
-    plt.xlabel("observed wavenumber k' ")
-
-    if pflag:
-        ax0.tick_params(bottom=True)
-        plt.ylabel("Power (m$^2$/k')")
-        plt.legend()
-    else:
-        ax0.tick_params(bottom=True, labelleft=False)
-
-F.save_light(path=plot_path, name=str(ID_name) + "_B06_atten_ov_simple")
-F.save_pup(path=plot_path, name=str(ID_name) + "_B06_atten_ov_simple")
-
-pos = gs[5:, 0:2]
-ax0 = F.fig.add_subplot(pos)
-
-lat_str = (
-    str(np.round(Gx.isel(x=0).lat.mean().data, 2))
-    + " to "
-    + str(np.round(Gx.isel(x=-1).lat.mean().data, 2))
-)
-plt.title(next(fn) + "Mean Displacement Spectra\n(lat=" + lat_str + ")", loc="left")
-
-dd = 10 * np.log((G_gFT_smth / G_gFT_smth.k).isel(x=slice(0, -1)))
-dd = dd.where(~np.isinf(dd), np.nan)
-
-## filter out segments with less then 10% of data points
-dd = dd.where(G_gFT_smth["N_per_stancil_fraction"] >= 0.1)
-
-dd_lims = np.round(dd.quantile(0.01).data * 0.95, 0), np.round(
-    dd.quantile(0.95).data * 1.05, 0
-)
-plt.pcolor(
-    dd.x / 1e3,
-    dd.k,
-    dd,
-    vmin=dd_lims[0],
-    vmax=dd_lims[-1],
-    cmap=color_schemes.white_base_blgror,
-)
-cb = plt.colorbar(orientation="vertical")
-
-cb.set_label("Power (m$^2$/k)")
-plt.plot(
-    G_gFT_smth.isel(x=slice(0, -1)).x / 1e3,
-    G_gFT_smth.isel(x=slice(0, -1)).k_lim,
-    color=color_schemes.black,
-    linewidth=1,
-)
-plt.ylabel("wavenumber k")
-plt.xlabel("X (km)")
-
-pos = gs[6:, -1]
-ax9 = F.fig.add_subplot(pos)
-
-plt.title("Data Coverage (%)", loc="left")
-plt.plot(
-    G_gFT_smth.x / 1e3,
-    G_gFT_smth["N_per_stancil_fraction"] * 100,
-    linewidth=0.8,
-    color="black",
-)
-ax9.spines["left"].set_visible(False)
-ax9.spines["right"].set_visible(True)
-ax9.tick_params(labelright=True, right=True, labelleft=False, left=False)
-ax9.axhline(10, linewidth=0.8, linestyle="--", color="black")
-plt.xlabel("X (km)")
-
-
-F.save_light(path=plot_path, name=str(ID_name) + "_B06_atten_ov")
-F.save_pup(path=plot_path, name=str(ID_name) + "_B06_atten_ov")
-
-
-# reconstruct slope displacement data
-def fit_offset(x, data, model, nan_mask, deg):
-    p_offset = np.polyfit(x[~nan_mask], data[~nan_mask] - model[~nan_mask], deg)
-    p_offset[-1] = 0
-    poly_offset = np.polyval(p_offset, x)
-    return poly_offset
-
-
-def tanh_fitler(x, x_cutoff, sigma_g=0.01):
-    """
-    zdgfsg
-    """
-
-    decay = 0.5 - np.tanh((x - x_cutoff) / sigma_g) / 2
-    return decay
-
-
-def reconstruct_displacement(Gx_1, Gk_1, T3, k_thresh):
-    """
-    reconstructs photon displacement heights for each stancil given the model parameters in Gk_1
-    A low-pass frequeny filter can be applied using k-thresh
-
-    inputs:
-    Gk_1    model data per stencil from _gFT_k file with sin and cos coefficients
-    Gx_1    real data per stencil from _gFT_x file with mean photon heights and coordindate systems
-    T3
-    k_thresh (None) threshold for low-pass filter
-
-    returns:
-    height_model  reconstucted displements heights of the stancil
-    poly_offset   fitted staight line to the residual between observations and model to account for low-pass variability
-    nan_mask      mask where is observed data in
-    """
-
-    dist_stencil = Gx_1.eta + Gx_1.x
-    dist_stencil_lims = dist_stencil[0].data, dist_stencil[-1].data
-
-    gFT_cos_coeff_sel = np.copy(Gk_1.gFT_cos_coeff)
-    gFT_sin_coeff_sel = np.copy(Gk_1.gFT_sin_coeff)
-
-    gFT_cos_coeff_sel = gFT_cos_coeff_sel * tanh_fitler(Gk_1.k, k_thresh, sigma_g=0.003)
-    gFT_sin_coeff_sel = gFT_sin_coeff_sel * tanh_fitler(Gk_1.k, k_thresh, sigma_g=0.003)
-
-    FT_int = gFT.generalized_Fourier(Gx_1.eta + Gx_1.x, None, Gk_1.k)
-    _ = FT_int.get_H()
-    FT_int.p_hat = np.concatenate(
-        [-gFT_sin_coeff_sel / Gk_1.k, gFT_cos_coeff_sel / Gk_1.k]
-    )
-
-    dx = Gx.eta.diff("eta").mean().data
-    height_model = FT_int.model() / dx
-    dist_nanmask = np.isnan(Gx_1.y_data)
-    height_data = np.interp(
-        dist_stencil, T3_sel["dist"], T3_sel["heights_c_weighted_mean"]
-    )
-    return height_model, np.nan, dist_nanmask
-
-
-# cutting Table data
-G_height_model = dict()
-k = "gt2l"
-for bb in Gx.beam.data:
-    G_height_model_temp = dict()
-    for i in np.arange(Gx.x.size):
-        Gx_1 = Gx.isel(x=i).sel(beam=bb)
-        Gk_1 = Gk.isel(x=i).sel(beam=bb)
-        k_thresh = G_gFT_smth.k_lim.isel(x=0).data
-
-        dist_stencil = Gx_1.eta + Gx_1.x
-        dist_stencil_lims = dist_stencil[0].data, dist_stencil[-1].data
-        dist_stencil_lims_plot = dist_stencil_lims
-        dist_stencil_lims_plot = Gx_1.eta[0] * 1 + Gx_1.x, Gx_1.eta[-1] * 1 + Gx_1.x
-
-        T3_sel = B3[k].loc[
-            (
-                (B3[k]["dist"] >= dist_stencil_lims[0])
-                & (B3[k]["dist"] <= dist_stencil_lims[1])
-            )
-        ]
-
-        if T3_sel.shape[0] != 0:
-            height_model, poly_offset, dist_nanmask = reconstruct_displacement(
-                Gx_1, Gk_1, T3_sel, k_thresh=k_thresh
-            )
-            poly_offset = poly_offset * 0
-            G_height_model_temp[str(i) + bb] = xr.DataArray(
-                height_model, coords=Gx_1.coords, dims=Gx_1.dims, name="height_model"
-            )
-        else:
-            G_height_model_temp[str(i) + bb] = xr.DataArray(
-                Gx_1.y_model.data,
-                coords=Gx_1.coords,
-                dims=Gx_1.dims,
-                name="height_model",
-            )
-
-    G_height_model[bb] = xr.concat(G_height_model_temp.values(), dim="x").T
-
-Gx["height_model"] = xr.concat(G_height_model.values(), dim="beam").transpose(
-    "eta", "beam", "x"
-)
-
-Gx_v2, B2_v2, B3_v2 = dict(), dict(), dict()
-for bb in Gx.beam.data:
-    print(bb)
-    Gx_k = Gx.sel(beam=bb)
-    Gh = Gx["height_model"].sel(beam=bb).T
-    Gh_err = Gx_k["model_error_x"].T
-    Gnans = np.isnan(Gx_k.y_model)
-
-    concented_heights = Gh.data.reshape(Gh.data.size)
-    concented_err = Gh_err.data.reshape(Gh.data.size)
-    concented_nans = Gnans.data.reshape(Gnans.data.size)
-    concented_x = (Gh.x + Gh.eta).data.reshape(Gh.data.size)
-
-    dx = Gh.eta.diff("eta")[0].data
-    continous_x_grid = np.arange(concented_x.min(), concented_x.max(), dx)
-    continous_height_model = np.interp(continous_x_grid, concented_x, concented_heights)
-    concented_err = np.interp(continous_x_grid, concented_x, concented_err)
-    continous_nans = np.interp(continous_x_grid, concented_x, concented_nans) == 1
-
-    T3 = B3[bb]
-    T3 = T3.sort_values("x")
-    T3 = T3.sort_values("dist")
-
-    T3["heights_c_model"] = np.interp(
-        T3["dist"], continous_x_grid, continous_height_model
-    )
-    T3["heights_c_model_err"] = np.interp(T3["dist"], continous_x_grid, concented_err)
-    T3["heights_c_residual"] = T3["heights_c_weighted_mean"] - T3["heights_c_model"]
-
-    B3_v2[bb] = T3
-    Gx_v2[bb] = Gx_k
-
-try:
-    G_angle = xr.open_dataset(load_path_angle + "/B05_" + ID_name + "_angle_pdf.nc")
-
-    font_for_pres()
-
-    Ga_abs = (
-        G_angle.weighted_angle_PDF_smth.isel(angle=G_angle.angle > 0).data
-        + G_angle.weighted_angle_PDF_smth.isel(angle=G_angle.angle < 0).data[:, ::-1]
-    ) / 2
-    Ga_abs = xr.DataArray(
-        data=Ga_abs.T,
-        dims=G_angle.dims,
-        coords=G_angle.isel(angle=G_angle.angle > 0).coords,
-    )
-
-    Ga_abs_front = Ga_abs.isel(x=slice(0, 3))
-    Ga_best = (Ga_abs_front * Ga_abs_front.N_data).sum("x") / Ga_abs_front.N_data.sum(
-        "x"
-    )
-
-    theta = Ga_best.angle[Ga_best.argmax()].data
-    theta_flag = True
-
-    font_for_print()
-    F = M.figure_axis_xy(3, 5, view_scale=0.7)
-
-    plt.subplot(2, 1, 1)
-    plt.pcolor(Ga_abs)
-    plt.xlabel("abs angle")
-    plt.ylabel("x")
-
-    ax = plt.subplot(2, 1, 2)
-    Ga_best.plot()
-    plt.title("angle front " + str(theta * 180 / np.pi), loc="left")
-    ax.axvline(theta, color="red")
-    F.save_light(path=plot_path, name="B06_angle_def")
-except:
-    print("no angle data found, skip angle corretion")
-    theta = 0
-    theta_flag = False
-
-# %%
-lam_p = 2 * np.pi / Gk.k
-lam = lam_p * np.cos(theta)
-
-if theta_flag:
-    k_corrected = 2 * np.pi / lam
-    x_corrected = Gk.x * np.cos(theta)
-else:
-    k_corrected = 2 * np.pi / lam * np.nan
-    x_corrected = Gk.x * np.cos(theta) * np.nan
-
-# spectral save
-G5 = G_gFT_wmean.expand_dims(dim="beam", axis=1)
-G5.coords["beam"] = ["weighted_mean"]
-G5 = G5.assign_coords(N_photons=G5.N_photons)
-G5["N_photons"] = G5["N_photons"].expand_dims("beam")
-G5["N_per_stancil_fraction"] = G5["N_per_stancil_fraction"].expand_dims("beam")
-
-Gk_v2 = xr.merge([Gk, G5])
-
-Gk_v2 = Gk_v2.assign_coords(x_corrected=("x", x_corrected.data)).assign_coords(
-    k_corrected=("k", k_corrected.data)
-)
-
-Gk_v2.attrs["best_guess_incident_angle"] = theta
-
-# save collected spectral data
-Gk_v2.to_netcdf(save_path + "/B06_" + ID_name + "_gFT_k_corrected.nc")
-Gx
-# save real space data
-Gx.to_netcdf(save_path + "/B06_" + ID_name + "_gFT_x_corrected.nc")
-try:
-    io.save_pandas_table(
-        B2_v2, "B06_" + ID_name + "_B06_corrected_resid", save_path
-    )  # all photos but heights adjusted and with distance coordinate
-except:
-    os.remove(save_path + "B06_" + ID_name + "_B06_corrected_resid.h5")
-    io.save_pandas_table(
-        B2_v2, "B06_" + ID_name + "_B06_corrected_resid", save_path
-    )  # all photos but heights adjusted and with distance coordinate
-
-try:
-    io.save_pandas_table(
-        B3_v2, "B06_" + ID_name + "_binned_resid", save_path
-    )  # regridding heights
-except:
-    os.remove(save_path + "B06_" + ID_name + "_binned_resid.h5")
-    io.save_pandas_table(
-        B3_v2, "B06_" + ID_name + "_binned_resid", save_path
-    )  # regridding heights
-
-MT.json_save(
-    "B06_success",
-    plot_path + "../",
-    {"time": time.asctime(time.localtime(time.time()))},
-)
-print("done. saved target at " + plot_path + "../B06_success")

From a8a183cc4cbf394c533f60ce100a6a2a7cbac121 Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Thu, 1 Feb 2024 08:37:03 -0500
Subject: [PATCH 16/30] tracking previous version of the  file

---
 analysis_db/B06_correct_separate_var.py | 852 ++++++++++++++++++++++++
 1 file changed, 852 insertions(+)
 create mode 100644 analysis_db/B06_correct_separate_var.py

diff --git a/analysis_db/B06_correct_separate_var.py b/analysis_db/B06_correct_separate_var.py
new file mode 100644
index 00000000..e0a6d53f
--- /dev/null
+++ b/analysis_db/B06_correct_separate_var.py
@@ -0,0 +1,852 @@
+# %%
+import os, sys
+#execfile(os.environ['PYTHONSTARTUP'])
+
+"""
+This file open a ICEsat2 track applied filters and corections and returns smoothed photon heights on a regular grid in an .nc file.
+This is python 3
+"""
+
+exec(open(os.environ['PYTHONSTARTUP']).read())
+exec(open(STARTUP_2021_IceSAT2).read())
+
+#%matplotlib inline
+
+import ICEsat2_SI_tools.convert_GPS_time as cGPS
+import h5py
+import ICEsat2_SI_tools.io as io
+import ICEsat2_SI_tools.spectral_estimates as spec
+import ICEsat2_SI_tools.lanczos as lanczos
+import time
+import imp
+import copy
+import spicke_remover
+import datetime
+import generalized_FT as gFT
+from scipy.ndimage.measurements import label
+
+xr.set_options(display_style='text')
+#import s3fs
+# %%
+ID_name, batch_key, test_flag = io.init_from_input(sys.argv) # loads standard experiment
+#ID_name, batch_key, test_flag = '20190605061807_10380310_004_01', 'SH_batch01', False
+#ID_name, batch_key, test_flag = '20190601094826_09790312_004_01', 'SH_batch01', False
+#ID_name, batch_key, test_flag = '20190207111114_06260210_004_01', 'SH_batch02', False
+#ID_name, batch_key, test_flag = '20190208152826_06440210_004_01', 'SH_batch01', False
+#ID_name, batch_key, test_flag = '20190213133330_07190212_004_01', 'SH_batch02', False
+#ID_name, batch_key, test_flag = '20190207002436_06190212_004_01', 'SH_batch02', False
+#ID_name, batch_key, test_flag = '20190206022433_06050212_004_01', 'SH_batch02', False
+
+#ID_name, batch_key, test_flag = '20190219073735_08070210_004_01', 'SH_batch02', False
+#ID_name, batch_key, test_flag = '20190502021224_05160312_004_01', 'SH_batch02', False
+
+#ID_name, batch_key, test_flag =  'SH_20190208_06440212', 'SH_publish', True
+#ID_name, batch_key, test_flag =  'SH_20190219_08070210', 'SH_publish', True
+#ID_name, batch_key, test_flag =  'SH_20190502_05160312', 'SH_publish', True
+
+#ID_name, batch_key, test_flag =  'NH_20190311_11200203', 'NH_batch06', True
+#ID_name, batch_key, test_flag =  'NH_20210312_11961005', 'NH_batch07', True
+
+#ID_name, batch_key , test_flag = 'SH_20190502_05180312', 'SH_testSLsinglefile2' , True
+
+#print(ID_name, batch_key, test_flag)
+hemis, batch = batch_key.split('_')
+
+all_beams   = mconfig['beams']['all_beams']
+high_beams  = mconfig['beams']['high_beams']
+low_beams   = mconfig['beams']['low_beams']
+
+load_path_work    = mconfig['paths']['work'] +'/'+ batch_key +'/'
+B3_hdf5    = h5py.File(load_path_work +'B01_regrid'+'/'+ID_name + '_B01_binned.h5', 'r')
+
+
+load_path_angle   = mconfig['paths']['work'] +'/'+ batch_key +'/B04_angle/'
+
+B3 = dict()
+for b in all_beams:
+    B3[b] = io.get_beam_hdf_store(B3_hdf5[b])
+
+B3_hdf5.close()
+
+# B2          = io.load_pandas_table_dict(ID_name + '_B01_regridded'  , load_path1) # rhis is the rar photon data
+# B3          = io.load_pandas_table_dict(ID_name + '_B01_binned'     , load_path1)  #
+
+load_file   = load_path_work +'/B02_spectra/' + 'B02_' + ID_name #+ '.nc'
+Gk = xr.open_dataset(load_file+'_gFT_k.nc')
+Gx = xr.open_dataset(load_file+'_gFT_x.nc')
+Gfft = xr.open_dataset(load_file+'_FFT.nc')
+
+
+#plot_path   = mconfig['paths']['plot'] + '/'+hemis+'/'+batch_key+'/' + ID_name + '/'
+plot_path   = mconfig['paths']['plot'] + '/'+hemis+'/'+batch_key+'/' + ID_name + '/B06_correction/'
+MT.mkdirs_r(plot_path)
+
+save_path   = mconfig['paths']['work'] +batch_key+'/B06_corrected_separated/'
+MT.mkdirs_r(save_path)
+
+
+# %%
+
+#Gfilt   = io.load_pandas_table_dict(ID_name + '_B01_regridded', load_path) # rhis is the rar photon data
+#Gd      = io.load_pandas_table_dict(ID_name + '_B01_binned' , load_path)  #
+
+col.colormaps2(31, gamma=1)
+col_dict= col.rels
+
+
+# %%
+def dict_weighted_mean(Gdict, weight_key):
+    """
+    returns the weighted meean of a dict of xarray, data_arrays
+    weight_key must be in the xr.DataArrays
+    """
+    #Gdict = G_rar_fft
+    #weight_key='N_per_stancil'
+
+    akey = list( Gdict.keys() )[0]
+    GSUM = Gdict[akey].copy()
+    GSUM.data     = np.zeros(GSUM.shape)
+    N_per_stancil = GSUM.N_per_stancil * 0
+    N_photons     = np.zeros(GSUM.N_per_stancil.size)
+
+    counter= 0
+    for k,I in Gdict.items():
+        #print(k)
+        I =I.squeeze()
+        print(len(I.x) )
+        if len(I.x) !=0:
+            GSUM                += I.where( ~np.isnan(I), 0) * I[weight_key] #.sel(x=GSUM.x)
+            N_per_stancil       += I[weight_key]
+        if 'N_photons' in GSUM.coords:
+            N_photons    += I['N_photons']
+        counter+=1
+
+    GSUM             = GSUM  / N_per_stancil
+
+    if 'N_photons' in GSUM.coords:
+        GSUM.coords['N_photons'] = (('x', 'beam'), np.expand_dims(N_photons, 1) )
+
+    GSUM['beam'] = ['weighted_mean']
+    GSUM.name='power_spec'
+
+    return GSUM
+
+
+#G_gFT_wmean = (Gk['gFT_PSD_data'].where( ~np.isnan(Gk['gFT_PSD_data']), 0) * Gk['N_per_stancil']).sum('beam')/ Gk['N_per_stancil'].sum('beam')
+
+G_gFT_wmean = (Gk.where( ~np.isnan(Gk['gFT_PSD_data']), 0) * Gk['N_per_stancil']).sum('beam')/ Gk['N_per_stancil'].sum('beam')
+G_gFT_wmean['N_photons'] = Gk['N_photons'].sum('beam')
+
+G_fft_wmean = (Gfft.where( ~np.isnan(Gfft), 0) * Gfft['N_per_stancil']).sum('beam')/ Gfft['N_per_stancil'].sum('beam')
+G_fft_wmean['N_per_stancil'] = Gfft['N_per_stancil'].sum('beam')
+
+
+# %% plot
+
+# derive spectral errors:
+Lpoints=  Gk.Lpoints.mean('beam').data
+N_per_stancil = Gk.N_per_stancil.mean('beam').data#[0:-2]
+
+G_error_model =dict()
+G_error_data =dict()
+
+for bb in Gk.beam.data:
+    I = Gk.sel(beam= bb)
+    b_bat_error =  np.concatenate([ I.model_error_k_cos.data , I.model_error_k_sin.data ])
+    Z_error = gFT.complex_represenation(b_bat_error, Gk.k.size, Lpoints)
+    PSD_error_data, PSD_error_model = gFT.Z_to_power_gFT(Z_error, np.diff(Gk.k)[0],N_per_stancil  , Lpoints )
+
+    #np.expand_dims(PSD_error_model, axis =)
+    G_error_model[bb] =  xr.DataArray(data = PSD_error_model, coords = I.drop('N_per_stancil').coords, name='gFT_PSD_data_error' ).expand_dims('beam')
+    G_error_data[bb] =  xr.DataArray(data = PSD_error_data, coords = I.drop('N_per_stancil').coords, name='gFT_PSD_data_error' ).expand_dims('beam')
+
+gFT_PSD_data_error_mean = xr.concat(G_error_model.values(), dim='beam')
+gFT_PSD_data_error_mean = xr.concat(G_error_data.values(), dim='beam')
+
+gFT_PSD_data_error_mean = ( gFT_PSD_data_error_mean.where( ~np.isnan(gFT_PSD_data_error_mean), 0) * Gk['N_per_stancil']).sum('beam')/Gk['N_per_stancil'].sum('beam')
+gFT_PSD_data_error_mean = ( gFT_PSD_data_error_mean.where( ~np.isnan(gFT_PSD_data_error_mean), 0) * Gk['N_per_stancil']).sum('beam')/Gk['N_per_stancil'].sum('beam')
+
+G_gFT_wmean['gFT_PSD_data_err'] = gFT_PSD_data_error_mean
+G_gFT_wmean['gFT_PSD_data_err'] = gFT_PSD_data_error_mean
+
+Gk['gFT_PSD_data_err'] = xr.concat(G_error_model.values(), dim='beam')
+Gk['gFT_PSD_data_err']  = xr.concat(G_error_data.values(), dim='beam')
+
+
+# %%
+
+G_gFT_smth = G_gFT_wmean['gFT_PSD_data'].rolling(k=30, center=True, min_periods=1).mean()
+G_gFT_smth['N_photons'] = G_gFT_wmean.N_photons
+G_gFT_smth["N_per_stancil_fraction"] = Gk['N_per_stancil'].T.mean('beam')/Gk.Lpoints.mean('beam')
+
+k = G_gFT_smth.k
+
+# %%
+# GG_no_nan = G_gFT_smth.isel( x = ~np.isnan(G_gFT_smth.mean('k')) )
+# k_lead_peak = GG_no_nan.k[GG_no_nan.isel(x=0).argmax().data].data
+# if k_lead_peak== k[0].data or k_lead_peak == k[-1].data:
+#     #raise ValueError('wavenumber Peak on Boundary!')
+#     print('wavenumber Peak on Boundary!')
+#     MT.json_save('B06_fail', plot_path+'../',  {'time':time.asctime( time.localtime(time.time()) ) , 'reason': 'wavenumber Peak on Boundary!'})
+#     print('exit()')
+#     #exit()
+#
+# # %%
+# k_lims =0.01
+# k_span = [k_lead_peak- k_lims , k_lead_peak, k_lead_peak+ k_lims]
+
+F = M.figure_axis_xy()
+#plt.loglog(k, k**(-2))
+# plt.loglog(k, 1e-4 *k**(-2))
+# plt.loglog(k, 1e-5 *k**(-3))
+
+# F.ax.axvline(k_span[0])
+# F.ax.axvline(k_span[1])
+# F.ax.axvline(k_span[2])
+#plt.plot(np.log(k), np.log( k**(-3) ) )
+#plt.loglog(k, (k)**(-3) - 1e5)
+
+plt.loglog(k, G_gFT_smth/k)
+# dd= dd.where(~np.isinf(dd), np.nan )
+#plt.grid()
+plt.title('displacement power Spectra', loc='left')
+
+# %%
+def define_noise_wavenumber_tresh_simple(data_xr, k_peak, k_end_lim =None,  plot_flag = False):
+
+    """
+    returns noise wavenumber on the high end of a spectral peak. This method fits a straight line in loglog speace using robust regression.
+    The noise level is defined as the wavenumber at which the residual error of a linear fit to the data is minimal.
+
+    inputs:
+    data_xr xarray.Dataarray with the power spectra with k as dimension
+    k_peak  wavenumber above which the searh should start
+    dk      the intervall over which the regrssion is repeated
+
+    returns:
+    k_end   the wavenumber at which the spectrum flattens
+    m       slope of the fitted line
+    b       intersect of the fitted line
+    """
+    #data_xr, k_peak =    G_gFT_smth.isel(x=0), k_lead_peak
+    #k_end_lim = None#
+    #k_end_lim= 0.06396283#0.0224938*1.05
+    from scipy.ndimage.measurements import label
+
+    if k_end_lim is None:
+        k_end_lim =data_xr.k[-1]
+
+    k_lead_peak_margin = k_peak *1.05
+    try:
+        data_log = np.log(data_xr).isel(k =(data_xr.k > k_lead_peak_margin)).rolling(k =10,  center=True, min_periods=1).mean()
+
+    except:
+        data_log = np.log(data_xr).isel(k =(data_xr.k > k_lead_peak_margin/2)).rolling(k =10,  center=True, min_periods=1).mean()
+
+    k_log= np.log(data_log.k)
+    try:
+        d_grad = data_log.differentiate('k').rolling(k =40, center=True, min_periods=4).mean()
+    except:
+        d_grad = data_log.differentiate('k').rolling(k =20, center=True, min_periods=2).mean()
+    ll = label( d_grad >=-5  )
+
+    #test if plausible minium exist:
+    # #print(ll[0][d_grad.k <= k_end_lim] )
+    # if sum(  ll[0][d_grad.k <= k_end_lim] ==0) == 0:
+    #     #print(sum(  ll[0][d_grad.k <= k_end_lim] ==0) == 0)
+    #     print('no gradient in range, set to peak')
+    #     return k_peak
+
+    if ll[0][0] !=0:
+        #print(sum(  ll[0][d_grad.k <= k_end_lim] ==0) == 0)
+        print('no decay, set to peak')
+        return k_peak
+
+    if sum(ll[0]) == 0:
+        k_end = d_grad.k[-1]
+    else:
+        k_end = d_grad.k[(ll[0] == 1) ][0].data
+
+    if plot_flag:
+        # plt.plot(np.log(d_grad.k), d_grad)
+        # plt.show()
+        plt.plot(np.log(data_xr.k), np.log(data_xr))
+        plt.plot(k_log, data_log )
+        plt.plot([np.log(k_end), np.log(k_end)], [-6, -5])
+        #print(k_end)
+    return k_end
+
+
+
+# %% new version
+def get_correct_breakpoint(pw_results):
+    br_points   = list()
+    for i in pw_results.keys():
+        [br_points.append(i) if 'breakpoint' in i else None]
+    br_points_df = pw_results[br_points]
+    br_points_sorted = br_points_df.sort_values()
+
+    alphas_sorted = [i.replace('breakpoint', 'alpha') for i in br_points_df.sort_values().index]
+    alphas_sorted.append('alpha'+ str(len(alphas_sorted)+1) )
+
+
+    betas_sorted = [i.replace('breakpoint', 'beta') for i in br_points_df.sort_values().index]
+
+    #betas_sorted
+    alphas_v2 = list()
+    alpha_i = pw_results['alpha1']
+    for i in [0] + list(pw_results[betas_sorted]):
+        alpha_i += i
+        alphas_v2.append(alpha_i)
+
+    alphas_v2_sorted   = pd.Series(index = alphas_sorted, data =alphas_v2)
+    br_points_sorted['breakpoint'+ str(br_points_sorted.size+1)] = 'end'
+
+    print('all alphas')
+    print(alphas_v2_sorted)
+    slope_mask = alphas_v2_sorted < 0
+
+    if sum(slope_mask) ==0:
+        print('no negative slope found, set to lowest')
+        breakpoint = 'start'
+    else:
+
+        # take steepest slope
+        alpah_v2_sub = alphas_v2_sorted[slope_mask]
+        print(alpah_v2_sub)
+        print(alpah_v2_sub.argmin())
+        break_point_name =  alpah_v2_sub.index[alpah_v2_sub.argmin()].replace('alpha', 'breakpoint')
+
+        # take first slope
+        #break_point_name = alphas_v2_sorted[slope_mask].index[0].replace('alpha', 'breakpoint')
+        breakpoint = br_points_sorted[break_point_name]
+
+    return breakpoint
+
+def get_breakingpoints(xx, dd):
+
+    import piecewise_regression
+    x2, y2 = xx, dd
+    convergence_flag =True
+    n_breakpoints= 3
+    while convergence_flag:
+        pw_fit = piecewise_regression.Fit(x2, y2, n_breakpoints=n_breakpoints)
+        print('n_breakpoints', n_breakpoints, pw_fit.get_results()['converged'])
+        convergence_flag = not pw_fit.get_results()['converged']
+        n_breakpoints += 1
+        if n_breakpoints >=4:
+            convergence_flag = False
+
+    pw_results = pw_fit.get_results()
+    #pw_fit.summary()
+
+    if pw_results['converged']:
+        # if pw_results['estimates']['alpha1']['estimate'] < 0:
+        #     print('decay at the front')
+        #     print('n_breakpoints',pw_fit.n_breakpoints )
+
+        pw_results_df = pd.DataFrame(pw_results['estimates']).loc['estimate']
+
+        breakpoint = get_correct_breakpoint(pw_results_df)
+
+        return pw_fit, breakpoint
+
+    else:
+        return pw_fit, False
+
+def define_noise_wavenumber_piecewise(data_xr, plot_flag = False):
+
+    data_log = data_xr
+    data_log = np.log(data_xr)
+
+    k =data_log.k.data
+    k_log= np.log(k)
+
+    pw_fit, breakpoint_log   = get_breakingpoints(k_log, data_log.data)
+
+    if breakpoint_log is 'start':
+        print('no decay, set to lowerst wavenumber')
+        breakpoint_log =  k_log[0]
+    if (breakpoint_log is 'end') | (breakpoint_log is False) :
+        print('higest wavenumner')
+        breakpoint_log =  k_log[-1]
+
+    breakpoint_pos                  = abs(k_log -breakpoint_log).argmin()
+    breakpoint_k                    = k[breakpoint_pos]
+
+    #plot_flag= False
+    if plot_flag:
+        # plt.plot(np.log(d_grad.k), d_grad)
+        # plt.show()
+        pw_fit.plot()
+        #plt.plot(np.log(data_xr.k), np.log(data_xr))
+        plt.plot(k_log, data_log )
+        #plt.gca().set_xscale('log')
+        #plt.plot([np.log(breakpoint_k), np.log(breakpoint_k)], [-6, -5])
+        #print(k_end)
+
+    return breakpoint_k, pw_fit
+
+#G_gFT_smth.isel(x=7).plot()
+
+k_lim_list = list()
+k_end_previous = np.nan
+x = G_gFT_smth.x.data[0]
+k = G_gFT_smth.k.data
+
+for x in G_gFT_smth.x.data:
+    #x = G_gFT_smth.isel(x=9).x
+    #x= 237500.0
+    print(x)
+    # use displacement power spectrum
+    k_end, pw_fit = define_noise_wavenumber_piecewise(G_gFT_smth.sel(x=x)/k, plot_flag =False )
+    #pw_fit.get_results()
+    #pw_fit.n_breakpoints
+
+    #pw_fit.summary()
+    #k_end, slope = define_noise_wavenumber_piecewise(G_gFT_smth.sel(x=x), k_lead_peak, k_end_lim= k_end_0, plot_flag =True )
+    #k_end = define_noise_wavenumber_tresh_simple(G_gFT_smth.sel(x=x), k_lead_peak, k_end_lim= k_end_0, plot_flag =True )
+
+
+    k_save = k_end_previous if k_end == k[0] else k_end
+    #k_save = k_end_previous if k_end >= k[-1]*0.95 else k_end
+
+    #k_save = k_end_previous if k_end == k[-1] else k_end
+    k_end_previous = k_save #if k_end_0 is None else k_end_0
+    k_lim_list.append(k_save)
+
+    #k_save = np.nan if slope >= 0 else k_end
+    # plt.gca().axvline(np.log(k_save), linewidth= 2, color='red')
+    # plt.show()
+    print('--------------------------')
+# %%
+# write k limits to datasets
+# lanczos.lanczos_filter_1d(G_gFT_smth.x, k_lim_list, 2)
+# lanczos.lanczos_filter_1d_wrapping
+
+font_for_pres()
+G_gFT_smth.coords['k_lim'] = ('x', k_lim_list )
+G_gFT_smth.k_lim.plot()
+#G_gFT_smth.k_lim.rolling(x=4,  center=True, min_periods=1).median().plot()
+k_lim_smth = G_gFT_smth.k_lim.rolling(x=3,  center=True, min_periods=1).mean()
+k_lim_smth.plot(c='r')
+
+plt.title('k_c filter', loc='left')
+F.save_light(path=plot_path, name = str(ID_name)+ '_B06_atten_ov')
+
+G_gFT_smth['k_lim']  = k_lim_smth #G_gFT_smth.k_lim.rolling(x=3,  center=True, min_periods=1).mean().plot(c='r').data
+G_gFT_wmean.coords['k_lim'] = k_lim_smth #('x', k_lim_smth )
+
+
+# %%
+font_for_print()
+
+fn = copy.copy(lstrings)
+F = M.figure_axis_xy(fig_sizes['two_column'][0], fig_sizes['two_column'][0]* 0.9, container= True, view_scale =1)
+
+
+plt.suptitle('Cut-off Frequency for Displacement Spectral\n' + io.ID_to_str(ID_name), y = 0.97)
+gs = GridSpec(8,3,  wspace=0.1,  hspace=1.5)#figure=fig,#
+
+#
+# #clev = M.clevels( [Gmean.quantile(0.6).data * 1e4, Gmean.quantile(0.99).data * 1e4], 31)/ 1e4
+#
+k_lims = G_gFT_wmean.k_lim
+xlims= G_gFT_wmean.k[0], G_gFT_wmean.k[-1]
+#
+k =high_beams[0]
+for pos, k, pflag in zip([gs[0:2, 0],gs[0:2, 1],gs[0:2, 2] ], high_beams, [True, False, False] ):
+    ax0 = F.fig.add_subplot(pos)
+    Gplot = Gk.sel(beam = k).isel(x = slice(0, -1)).gFT_PSD_data.squeeze().rolling(k=20, x=2, min_periods= 1, center=True).mean()
+    #Gplot.plot()
+
+    Gplot= Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)#.plot()
+    #Gplot.plot()
+
+
+    alpha_range= iter(np.linspace(1,0, Gplot.x.data.size))
+    for x in Gplot.x.data:
+        ialpha =next(alpha_range)
+        plt.loglog(Gplot.k, Gplot.sel(x=x)/Gplot.k, linewidth = 0.5, color= col.rels[k], alpha= ialpha)
+        ax0.axvline(k_lims.sel(x=x), linewidth= 0.4, color= 'black', zorder= 0, alpha=ialpha)
+
+    plt.title(next(fn) + k, color= col_dict[k], loc= 'left')
+    plt.xlim(xlims)
+    #
+    if pflag:
+        ax0.tick_params(labelbottom=False, bottom=True)
+        plt.ylabel("Power (m$^2$/k')")
+        plt.legend()
+    else:
+        ax0.tick_params(labelbottom=False, bottom=True, labelleft=False)
+
+for pos, k, pflag in zip([gs[2:4, 0],gs[2:4, 1],gs[2:4, 2] ], low_beams, [True, False, False] ):
+    ax0 = F.fig.add_subplot(pos)
+    Gplot = Gk.sel(beam = k).isel(x = slice(0, -1)).gFT_PSD_data.squeeze().rolling(k=20, x=2, min_periods= 1, center=True).mean()
+    #Gplot.mean('x').plot()
+
+    Gplot= Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)#.plot()
+
+    alpha_range= iter(np.linspace(1,0, Gplot.x.data.size))
+    for x in Gplot.x.data:
+        ialpha =next(alpha_range)
+        plt.loglog(Gplot.k, Gplot.sel(x=x)/Gplot.k, linewidth = 0.5, color= col.rels[k], alpha= ialpha)
+        ax0.axvline(k_lims.sel(x=x), linewidth= 0.4, color= 'black', zorder= 0, alpha=ialpha)
+
+    plt.title(next(fn) + k, color= col_dict[k], loc= 'left')
+    plt.xlim(xlims)
+    plt.xlabel("observed wavenumber k' ")
+
+    #
+    if pflag:
+        ax0.tick_params( bottom=True)
+        plt.ylabel("Power (m$^2$/k')")
+        plt.legend()
+    else:
+        ax0.tick_params(bottom=True, labelleft=False)
+
+F.save_light(path=plot_path, name =str(ID_name) + '_B06_atten_ov_simple')
+F.save_pup(path=plot_path, name = str(ID_name) + '_B06_atten_ov_simple')
+
+# %
+pos = gs[5:, 0:2]
+ax0 = F.fig.add_subplot(pos)
+
+lat_str = str(np.round( Gx.isel(x = 0).lat.mean().data, 2)  ) +' to ' + str(np.round( Gx.isel(x = -1).lat.mean().data, 2)  )
+plt.title(next(fn) + 'Mean Displacement Spectra\n(lat='+ lat_str +')', loc='left')
+
+dd = (10 * np.log( (G_gFT_smth/G_gFT_smth.k) .isel(x = slice(0, -1))))#.plot()
+dd = dd.where(~np.isinf(dd), np.nan)
+
+## filter out segments with less then 10% of data points
+dd= dd.where(G_gFT_smth["N_per_stancil_fraction"] >= 0.1)#.plot()
+
+dd_lims = np.round(dd.quantile(0.01).data*0.95, 0) , np.round(dd.quantile(0.95).data*1.05, 0)
+plt.pcolor(dd.x/1e3, dd.k, dd, vmin=dd_lims[0], vmax= dd_lims[-1], cmap = col.white_base_blgror)
+cb = plt.colorbar(orientation= 'vertical')
+
+cb.set_label('Power (m$^2$/k)')
+plt.plot( G_gFT_smth.isel(x = slice(0, -1)).x/1e3 ,  G_gFT_smth.isel(x = slice(0, -1)).k_lim , color= col.black, linewidth = 1)
+plt.ylabel('wavenumber k')
+plt.xlabel('X (km)')
+
+pos = gs[6:, -1]
+ax9 = F.fig.add_subplot(pos)
+
+plt.title('Data Coverage (%)', loc ='left')
+plt.plot(G_gFT_smth.x/1e3 , G_gFT_smth["N_per_stancil_fraction"]*100 , linewidth = 0.8, color = 'black')
+ax9.spines['left'].set_visible(False)
+ax9.spines['right'].set_visible(True)
+ax9.tick_params(labelright=True, right=True, labelleft=False, left=False)
+ax9.axhline(10, linewidth = 0.8, linestyle= '--', color ='black')
+#plt.ylabel('(%)')
+plt.xlabel('X (km)')
+
+
+F.save_light(path=plot_path, name =str(ID_name) + '_B06_atten_ov')
+F.save_pup(path=plot_path, name = str(ID_name) + '_B06_atten_ov')
+
+
+# %% reconstruct slope displacement data
+def fit_offset(x, data,  model, nan_mask, deg):
+
+    #x, data,  model, nan_mask, deg = dist_stencil, height_data, height_model, dist_nanmask, 1
+    p_offset = np.polyfit(x[~nan_mask], data[~nan_mask] - model[~nan_mask], deg)
+    p_offset[-1] = 0
+    poly_offset = np.polyval(p_offset,x )
+    return poly_offset
+
+def tanh_fitler(x, x_cutoff , sigma_g= 0.01):
+    """
+        zdgfsg
+    """
+
+    decay   =  0.5 - np.tanh( (x-x_cutoff)/sigma_g  )/2
+    return decay
+
+
+#plt.plot(x, tanh_fitler(Gk_1.k, k_thresh, sigma_g= 0.003) )
+
+
+def reconstruct_displacement(Gx_1, Gk_1, T3, k_thresh):
+
+    """
+    reconstructs photon displacement heights for each stancil given the model parameters in Gk_1
+    A low-pass frequeny filter can be applied using k-thresh
+
+    inputs:
+    Gk_1    model data per stencil from _gFT_k file with sin and cos coefficients
+    Gx_1    real data per stencil from _gFT_x file with mean photon heights and coordindate systems
+    T3
+    k_thresh (None) threshold for low-pass filter
+
+    returns:
+    height_model  reconstucted displements heights of the stancil
+    poly_offset   fitted staight line to the residual between observations and model to account for low-pass variability
+    nan_mask      mask where is observed data in
+    """
+
+    dist_stencil = Gx_1.eta + Gx_1.x
+    dist_stencil_lims = dist_stencil[0].data, dist_stencil[-1].data
+
+    gFT_cos_coeff_sel = np.copy(Gk_1.gFT_cos_coeff)
+    gFT_sin_coeff_sel = np.copy(Gk_1.gFT_sin_coeff)
+
+    gFT_cos_coeff_sel = gFT_cos_coeff_sel*tanh_fitler(Gk_1.k, k_thresh, sigma_g= 0.003)
+    gFT_sin_coeff_sel = gFT_sin_coeff_sel*tanh_fitler(Gk_1.k, k_thresh, sigma_g= 0.003)
+
+    # gFT_cos_coeff_sel[Gk_1.k > k_thresh] = 0
+    # gFT_sin_coeff_sel[Gk_1.k > k_thresh] = 0
+
+
+    FT_int = gFT.generalized_Fourier(Gx_1.eta + Gx_1.x, None,Gk_1.k )
+    _ = FT_int.get_H()
+    FT_int.p_hat = np.concatenate([ -gFT_sin_coeff_sel /Gk_1.k, gFT_cos_coeff_sel/Gk_1.k ])
+
+    dx = Gx.eta.diff('eta').mean().data
+    height_model = FT_int.model() /dx# + T3_sel['heights_c_weighted_mean'].iloc[0]
+
+    dist_nanmask = np.isnan(Gx_1.y_data)
+    height_data  = np.interp(dist_stencil, T3_sel['dist'],  T3_sel['heights_c_weighted_mean']) #[~np.isnan(Gx_1.y_data)]
+    #poly_offset = fit_offset(dist_stencil, height_data, height_model, dist_nanmask, 1)
+
+    return height_model, np.nan, dist_nanmask
+
+# cutting Table data
+
+
+# %%
+G_height_model=dict()
+k       = 'gt2l'
+for bb in Gx.beam.data:
+    G_height_model_temp= dict()
+    for i in np.arange(Gx.x.size):
+        #k_thresh= 4
+
+        Gx_1    = Gx.isel(x= i).sel(beam = bb)
+        Gk_1    = Gk.isel(x= i).sel(beam = bb)
+        k_thresh= G_gFT_smth.k_lim.isel(x=0).data
+
+
+        dist_stencil        = Gx_1.eta + Gx_1.x
+        dist_stencil_lims   = dist_stencil[0].data, dist_stencil[-1].data
+        dist_stencil_lims_plot = dist_stencil_lims#Gx_1.eta[0]*0.25 + Gx_1.x, Gx_1.eta[-1]*0.25 + Gx_1.x
+        dist_stencil_lims_plot = Gx_1.eta[0]*1 + Gx_1.x, Gx_1.eta[-1]*1 + Gx_1.x
+
+        T3_sel              = B3[k].loc[( (B3[k]['dist']   >= dist_stencil_lims[0])    & (B3[k]['dist']    <= dist_stencil_lims[1])   )]
+        #T2_sel              = B2[k].loc[(  B2[k]['x_true'] >= T3_sel['x_true'].min() ) & ( B2[k]['x_true'] <= T3_sel['x_true'].max()  )]
+
+        if T3_sel.shape[0] != 0:
+            # if T3_sel['x_true'].iloc[-1] < T3_sel['x_true'].iloc[0]:
+            #     dist_T2_temp =np.interp(T2_sel['x_true'][::-1], T3_sel['x_true'][::-1],  T3_sel['dist'][::-1] )
+            #     T2_sel['dist']      = dist_T2_temp[::-1]
+            # else:
+            #     dist_T2_temp =np.interp(T2_sel['x_true'], T3_sel['x_true'],  T3_sel['dist'] )
+            #     T2_sel['dist']      = dist_T2_temp
+
+            height_model, poly_offset, dist_nanmask = reconstruct_displacement(Gx_1, Gk_1, T3_sel, k_thresh = k_thresh)
+            poly_offset = poly_offset*0
+            G_height_model_temp[str(i) + bb]     = xr.DataArray(height_model, coords=Gx_1.coords, dims= Gx_1.dims, name = 'height_model' )
+        else:
+            G_height_model_temp[str(i) + bb]     = xr.DataArray(Gx_1.y_model.data, coords=Gx_1.coords, dims= Gx_1.dims, name = 'height_model' )
+
+        #G_height_nans[i]      = xr.DataArray(dist_nanmask, coords=Gx_1.coords, dims= Gx_1.dims, name = 'nanmask' )
+
+        # jsut for plotting:
+        # # corrected rar Photon heights
+        # T2_sel['heights_c_residual']            = photon_height_residual = T2_sel['heights_c'] - np.interp(T2_sel['dist'], dist_stencil, height_model +  poly_offset)
+        #
+        # # interpolate rebinned photon heights
+        # heights_c_weighted_mean_stancil         = np.interp(dist_stencil, T3_sel['dist'], T3_sel['heights_c_weighted_mean'] )
+        #
+        # # corrected rebinned photon heights
+        # photon_height_residual_mean                = heights_c_weighted_mean_stancil   - (height_model + poly_offset)
+        # photon_height_residual_mean[dist_nanmask]  = np.nan
+        # T3_sel['heights_c_weighted_mean_residual'] = T3_sel['heights_c_weighted_mean'] - np.interp(T3_sel['dist'], dist_stencil, height_model +  poly_offset )
+
+        #plot
+        # font_for_pres()
+        # M.figure_axis_xy(5.5, 6, view_scale = 0.8)
+        #
+        # plt.subplot(3,1 ,1)
+        # plt.scatter(T2_sel['dist'], T2_sel['heights_c'], s= 1,  marker='o', color='black',   alpha =0.2, edgecolors= 'none' )
+        # #plt.scatter(T3_sel['dist'], T3_sel['heights_c_weighted_mean'], s= 1,  marker='o', color='black',   alpha =0.2, edgecolors= 'none' )
+        # plt.plot(T3_sel['dist'], T3_sel['heights_c_weighted_mean'] , color =col.rascade1, linewidth = 0.5, label = 'residual $h_c$')
+        # plt.xlim(dist_stencil_lims_plot)
+        # plt.ylim(0, 1.5)
+        #
+        # ax1 = plt.subplot(3,1 ,2)
+        # plt.plot(dist_stencil, height_model + poly_offset ,'-', c='red', linewidth=0.8, alpha=1,zorder= 12, label = 'GFT height model + correction')
+        # plt.plot(dist_stencil, height_model ,'-', c='orange', linewidth=0.8, alpha=0.5,zorder= 2, label = 'GFT height model')
+        # plt.legend(loc = 1)
+        # plt.xlim(dist_stencil_lims_plot)
+        # ax1.axhline(0, linewidth=0.5, color= 'black')
+        #
+        # plt.subplot(3,1 ,3)
+        # plt.scatter(T2_sel['dist'], T2_sel['heights_c_residual'], s= 1,  marker='o', color='black',   alpha =0.5, edgecolors= 'none', zorder=6 )
+        # #plt.scatter(T2_sel['dist'], T2_sel['heights_c_residual'], s= 1,  marker='o', color='black',   alpha =1, edgecolors= 'none' )
+        #
+        # plt.plot(T3_sel['dist'], T3_sel['heights_c_weighted_mean_residual'],'-', c=col.rascade2, linewidth=0.5, alpha=1, zorder= 10, label = 'GFT height model + correction')
+        # #plt.plot(dist_stencil, photon_height_residual_mean,'-', c='red', linewidth=0.3, alpha=1, zorder= 2, label = 'GFT height model + correction')
+        # plt.fill_between(dist_stencil , photon_height_residual_mean, color= col.cascade2, edgecolor = None, alpha = 1, zorder= 0)
+        #
+        # plt.xlim(dist_stencil_lims_plot)
+        # plt.ylim(0, 1.5)
+
+    G_height_model[bb] = xr.concat(G_height_model_temp.values(), dim= 'x').T
+
+Gx['height_model'] = xr.concat(G_height_model.values(), dim= 'beam').transpose('eta', 'beam', 'x')
+
+# %%
+Gx_v2, B2_v2, B3_v2 = dict(), dict(), dict()
+for bb in Gx.beam.data:
+    print(bb)
+    Gx_k                 = Gx.sel(beam = bb)
+    #Gx_k['height_model'] = xr.concat(G_height_model.values(), dim= 'x').T#.plot()
+    Gh          = Gx['height_model'].sel(beam = bb).T
+    Gh_err      = Gx_k['model_error_x'].T
+    Gnans       = np.isnan(Gx_k.y_model)
+
+    concented_heights   = Gh.data.reshape(Gh.data.size)
+    concented_err       = Gh_err.data.reshape(Gh.data.size)
+    concented_nans      = Gnans.data.reshape(Gnans.data.size)
+    concented_x         = (Gh.x+Gh.eta).data.reshape(Gh.data.size)
+
+    dx                      = Gh.eta.diff('eta')[0].data
+    continous_x_grid        = np.arange(concented_x.min(), concented_x.max(), dx)
+    continous_height_model  = np.interp(continous_x_grid, concented_x, concented_heights )
+    concented_err           = np.interp(continous_x_grid, concented_x, concented_err )
+    continous_nans          = np.interp(continous_x_grid, concented_x, concented_nans ) ==1
+
+    T3              = B3[bb]#.loc[( (B3[k]['dist']   >= dist_stencil_lims[0])    & (B3[k]['dist']    <= dist_stencil_lims[1])   )]
+    #T2              = B2[bb]#.loc[(  B2[k]['x_true'] >= T3_sel['x_true'].min() ) & ( B2[k]['x_true'] <= T3_sel['x_true'].max()  )]
+
+    #T2 = T2.sort_values('x_true')
+    T3 = T3.sort_values('x')
+    #T2['dist']    = np.interp(T2['x_true'], T3['x_true'],  T3['dist'] )
+    #T2 = T2.sort_values('dist')
+    T3 = T3.sort_values('dist')
+
+    #T2              = T2.sort_index()
+    #T2['dist']      = np.interp(T2['x_true'], T3['x_true'],  T3['dist'] )
+
+    T3['heights_c_model']     = np.interp(T3['dist'], continous_x_grid, continous_height_model)
+    T3['heights_c_model_err'] = np.interp(T3['dist'], continous_x_grid, concented_err)
+    T3['heights_c_residual']  = T3['heights_c_weighted_mean'] - T3['heights_c_model']
+
+    #T2['heights_c_model']     = np.interp(T2['dist'], continous_x_grid, continous_height_model)
+    #T2['heights_c_residual']  = T2['heights_c'] - T2['heights_c_model']
+
+
+    #B2_v2[bb] = T2
+    B3_v2[bb] = T3
+    Gx_v2[bb] = Gx_k
+
+    # font_for_print()
+    # F = M.figure_axis_xy(6, 2, view_scale= 0.7)
+    #
+    # plt.plot(T2['dist'] , T2['heights_c']+2,'ok', markersize=0.8, alpha=0.5, label='org photon height_c')
+    # plt.plot(T3['dist'] , T3['heights_c_weighted_mean']+2,'.r', markersize=1, alpha=0.5, label='org photon wmean')
+    #
+    # plt.plot(T2['dist'] , T2['heights_c_model'], '.', markersize=1, alpha=0.8, label='height model', color=col.orange, zorder= 12)
+    # F.ax.axhline(2, linewidth = .7, color= 'black')
+    # F.ax.axhline(0, linewidth = .7, color= 'black')
+    # F.ax.axhline(-2, linewidth = .7, color= 'black')
+    #
+    # plt.plot(T2['dist'] , T2['heights_c_residual']-2,'ob', markersize=0.5, alpha=0.5, label='residual photons')
+    # plt.plot(T3['dist'], T3['heights_c_residual']-2 , 'r', linewidth= 0.8, zorder=12, label='photon height_c resodual')
+    #
+    # xlims = np.nanmean(T2['dist']), np.nanmean(T2['dist'])+7e3
+    # plt.xlim(xlims)
+    # dlim = np.nanmax(T3['heights_c_residual'][(T3['dist']> xlims[0]) & (T3['dist'] < xlims[1])])
+    # #plt.ylim(-dlim*1.5, dlim*1.5)
+    # try:
+    #     plt.ylim((-2-1.5*dlim), 2+1.5*dlim)
+    # except:
+    #     plt.ylim(-5, 5)
+    # plt.legend( ncol= 4)
+    #F.save_light(path = plot_path , name = 'B06_'+bb+'__check')
+
+
+# %% correct wave incident direction
+
+#load_path = mconfig['paths']['work'] + '/B04_angle/'
+
+try:
+    G_angle = xr.open_dataset(load_path_angle+ '/B05_'+ID_name + '_angle_pdf.nc' )
+
+    font_for_pres()
+
+    Ga_abs = (G_angle.weighted_angle_PDF_smth.isel(angle = G_angle.angle > 0).data + G_angle.weighted_angle_PDF_smth.isel(angle = G_angle.angle < 0).data[:,::-1])/2
+    Ga_abs = xr.DataArray(data=Ga_abs.T, dims = G_angle.dims, coords=G_angle.isel(angle = G_angle.angle > 0).coords)
+
+    Ga_abs_front = Ga_abs.isel(x= slice(0, 3))
+    Ga_best = ((  Ga_abs_front * Ga_abs_front.N_data ).sum('x')/Ga_abs_front.N_data.sum('x'))
+
+    theta = Ga_best.angle[Ga_best.argmax()].data
+    theta_flag = True
+
+    font_for_print()
+    F = M.figure_axis_xy(3, 5, view_scale= 0.7)
+
+    plt.subplot(2, 1, 1)
+    plt.pcolor(Ga_abs)
+    plt.xlabel('abs angle')
+    plt.ylabel('x')
+
+    ax = plt.subplot(2, 1, 2)
+    Ga_best.plot()
+    plt.title('angle front ' + str(theta*180/np.pi), loc='left')
+    ax.axvline(theta, color= 'red')
+    F.save_light(path = plot_path , name = 'B06_angle_def')
+except:
+
+    print('no angle data found, skip angle corretion')
+    theta= 0
+    theta_flag = False
+
+# %%
+lam_p   = 2 *np.pi/Gk.k
+lam     = lam_p * np.cos(theta)
+
+if theta_flag:
+    k_corrected  = 2 * np.pi/lam
+    x_corrected  = Gk.x * np.cos(theta)
+else:
+    k_corrected  = 2 * np.pi/lam *np.nan
+    x_corrected  = Gk.x * np.cos(theta) *np.nan
+
+# %% spectral save
+G5 = G_gFT_wmean.expand_dims(dim = 'beam', axis = 1)
+G5.coords['beam'] = ['weighted_mean']#(('beam'), 'weighted_mean')
+G5 = G5.assign_coords(N_photons= G5.N_photons)
+G5['N_photons'] = G5['N_photons'].expand_dims('beam')
+G5['N_per_stancil_fraction'] = G5['N_per_stancil_fraction'].expand_dims('beam')
+
+Gk_v2 = xr.merge([Gk, G5])
+
+Gk_v2 = Gk_v2.assign_coords(x_corrected=("x", x_corrected.data)).assign_coords(k_corrected=("k", k_corrected.data))
+
+Gk_v2.attrs['best_guess_incident_angle'] = theta
+
+# save collected spectral data
+Gk_v2.to_netcdf(save_path+'/B06_'+ID_name + '_gFT_k_corrected.nc' )
+Gx
+# %% save real space data
+Gx.to_netcdf(save_path+'/B06_'+ID_name + '_gFT_x_corrected.nc' )
+try:
+    io.save_pandas_table(B2_v2, 'B06_' +ID_name + '_B06_corrected_resid' , save_path) # all photos but heights adjusted and with distance coordinate
+except:
+    os.remove(save_path+'B06_' +ID_name + '_B06_corrected_resid.h5')
+    io.save_pandas_table(B2_v2, 'B06_' +ID_name + '_B06_corrected_resid' , save_path) # all photos but heights adjusted and with distance coordinate
+
+try:
+    io.save_pandas_table(B3_v2, 'B06_' +ID_name + '_binned_resid' , save_path) # regridding heights
+except:
+    os.remove(save_path+'B06_' +ID_name + '_binned_resid.h5')
+    io.save_pandas_table(B3_v2, 'B06_' +ID_name + '_binned_resid' , save_path) # regridding heights
+
+MT.json_save('B06_success', plot_path + '../', {'time':time.asctime( time.localtime(time.time()) )})
+print('done. saved target at ' + plot_path + '../B06_success' )
+
+# %%
\ No newline at end of file

From 45cc08110262172169e5345f18870be5c336bd44 Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Thu, 1 Feb 2024 08:39:41 -0500
Subject: [PATCH 17/30] moved B06_correct_separate_var.py to src folder

---
 analysis_db/B06_correct_separate_var.py       | 852 ------------------
 .../analysis_db/B06_correct_separate_var.py   |   0
 2 files changed, 852 deletions(-)
 delete mode 100644 analysis_db/B06_correct_separate_var.py
 create mode 100644 src/icesat2_tracks/analysis_db/B06_correct_separate_var.py

diff --git a/analysis_db/B06_correct_separate_var.py b/analysis_db/B06_correct_separate_var.py
deleted file mode 100644
index e0a6d53f..00000000
--- a/analysis_db/B06_correct_separate_var.py
+++ /dev/null
@@ -1,852 +0,0 @@
-# %%
-import os, sys
-#execfile(os.environ['PYTHONSTARTUP'])
-
-"""
-This file open a ICEsat2 track applied filters and corections and returns smoothed photon heights on a regular grid in an .nc file.
-This is python 3
-"""
-
-exec(open(os.environ['PYTHONSTARTUP']).read())
-exec(open(STARTUP_2021_IceSAT2).read())
-
-#%matplotlib inline
-
-import ICEsat2_SI_tools.convert_GPS_time as cGPS
-import h5py
-import ICEsat2_SI_tools.io as io
-import ICEsat2_SI_tools.spectral_estimates as spec
-import ICEsat2_SI_tools.lanczos as lanczos
-import time
-import imp
-import copy
-import spicke_remover
-import datetime
-import generalized_FT as gFT
-from scipy.ndimage.measurements import label
-
-xr.set_options(display_style='text')
-#import s3fs
-# %%
-ID_name, batch_key, test_flag = io.init_from_input(sys.argv) # loads standard experiment
-#ID_name, batch_key, test_flag = '20190605061807_10380310_004_01', 'SH_batch01', False
-#ID_name, batch_key, test_flag = '20190601094826_09790312_004_01', 'SH_batch01', False
-#ID_name, batch_key, test_flag = '20190207111114_06260210_004_01', 'SH_batch02', False
-#ID_name, batch_key, test_flag = '20190208152826_06440210_004_01', 'SH_batch01', False
-#ID_name, batch_key, test_flag = '20190213133330_07190212_004_01', 'SH_batch02', False
-#ID_name, batch_key, test_flag = '20190207002436_06190212_004_01', 'SH_batch02', False
-#ID_name, batch_key, test_flag = '20190206022433_06050212_004_01', 'SH_batch02', False
-
-#ID_name, batch_key, test_flag = '20190219073735_08070210_004_01', 'SH_batch02', False
-#ID_name, batch_key, test_flag = '20190502021224_05160312_004_01', 'SH_batch02', False
-
-#ID_name, batch_key, test_flag =  'SH_20190208_06440212', 'SH_publish', True
-#ID_name, batch_key, test_flag =  'SH_20190219_08070210', 'SH_publish', True
-#ID_name, batch_key, test_flag =  'SH_20190502_05160312', 'SH_publish', True
-
-#ID_name, batch_key, test_flag =  'NH_20190311_11200203', 'NH_batch06', True
-#ID_name, batch_key, test_flag =  'NH_20210312_11961005', 'NH_batch07', True
-
-#ID_name, batch_key , test_flag = 'SH_20190502_05180312', 'SH_testSLsinglefile2' , True
-
-#print(ID_name, batch_key, test_flag)
-hemis, batch = batch_key.split('_')
-
-all_beams   = mconfig['beams']['all_beams']
-high_beams  = mconfig['beams']['high_beams']
-low_beams   = mconfig['beams']['low_beams']
-
-load_path_work    = mconfig['paths']['work'] +'/'+ batch_key +'/'
-B3_hdf5    = h5py.File(load_path_work +'B01_regrid'+'/'+ID_name + '_B01_binned.h5', 'r')
-
-
-load_path_angle   = mconfig['paths']['work'] +'/'+ batch_key +'/B04_angle/'
-
-B3 = dict()
-for b in all_beams:
-    B3[b] = io.get_beam_hdf_store(B3_hdf5[b])
-
-B3_hdf5.close()
-
-# B2          = io.load_pandas_table_dict(ID_name + '_B01_regridded'  , load_path1) # rhis is the rar photon data
-# B3          = io.load_pandas_table_dict(ID_name + '_B01_binned'     , load_path1)  #
-
-load_file   = load_path_work +'/B02_spectra/' + 'B02_' + ID_name #+ '.nc'
-Gk = xr.open_dataset(load_file+'_gFT_k.nc')
-Gx = xr.open_dataset(load_file+'_gFT_x.nc')
-Gfft = xr.open_dataset(load_file+'_FFT.nc')
-
-
-#plot_path   = mconfig['paths']['plot'] + '/'+hemis+'/'+batch_key+'/' + ID_name + '/'
-plot_path   = mconfig['paths']['plot'] + '/'+hemis+'/'+batch_key+'/' + ID_name + '/B06_correction/'
-MT.mkdirs_r(plot_path)
-
-save_path   = mconfig['paths']['work'] +batch_key+'/B06_corrected_separated/'
-MT.mkdirs_r(save_path)
-
-
-# %%
-
-#Gfilt   = io.load_pandas_table_dict(ID_name + '_B01_regridded', load_path) # rhis is the rar photon data
-#Gd      = io.load_pandas_table_dict(ID_name + '_B01_binned' , load_path)  #
-
-col.colormaps2(31, gamma=1)
-col_dict= col.rels
-
-
-# %%
-def dict_weighted_mean(Gdict, weight_key):
-    """
-    returns the weighted meean of a dict of xarray, data_arrays
-    weight_key must be in the xr.DataArrays
-    """
-    #Gdict = G_rar_fft
-    #weight_key='N_per_stancil'
-
-    akey = list( Gdict.keys() )[0]
-    GSUM = Gdict[akey].copy()
-    GSUM.data     = np.zeros(GSUM.shape)
-    N_per_stancil = GSUM.N_per_stancil * 0
-    N_photons     = np.zeros(GSUM.N_per_stancil.size)
-
-    counter= 0
-    for k,I in Gdict.items():
-        #print(k)
-        I =I.squeeze()
-        print(len(I.x) )
-        if len(I.x) !=0:
-            GSUM                += I.where( ~np.isnan(I), 0) * I[weight_key] #.sel(x=GSUM.x)
-            N_per_stancil       += I[weight_key]
-        if 'N_photons' in GSUM.coords:
-            N_photons    += I['N_photons']
-        counter+=1
-
-    GSUM             = GSUM  / N_per_stancil
-
-    if 'N_photons' in GSUM.coords:
-        GSUM.coords['N_photons'] = (('x', 'beam'), np.expand_dims(N_photons, 1) )
-
-    GSUM['beam'] = ['weighted_mean']
-    GSUM.name='power_spec'
-
-    return GSUM
-
-
-#G_gFT_wmean = (Gk['gFT_PSD_data'].where( ~np.isnan(Gk['gFT_PSD_data']), 0) * Gk['N_per_stancil']).sum('beam')/ Gk['N_per_stancil'].sum('beam')
-
-G_gFT_wmean = (Gk.where( ~np.isnan(Gk['gFT_PSD_data']), 0) * Gk['N_per_stancil']).sum('beam')/ Gk['N_per_stancil'].sum('beam')
-G_gFT_wmean['N_photons'] = Gk['N_photons'].sum('beam')
-
-G_fft_wmean = (Gfft.where( ~np.isnan(Gfft), 0) * Gfft['N_per_stancil']).sum('beam')/ Gfft['N_per_stancil'].sum('beam')
-G_fft_wmean['N_per_stancil'] = Gfft['N_per_stancil'].sum('beam')
-
-
-# %% plot
-
-# derive spectral errors:
-Lpoints=  Gk.Lpoints.mean('beam').data
-N_per_stancil = Gk.N_per_stancil.mean('beam').data#[0:-2]
-
-G_error_model =dict()
-G_error_data =dict()
-
-for bb in Gk.beam.data:
-    I = Gk.sel(beam= bb)
-    b_bat_error =  np.concatenate([ I.model_error_k_cos.data , I.model_error_k_sin.data ])
-    Z_error = gFT.complex_represenation(b_bat_error, Gk.k.size, Lpoints)
-    PSD_error_data, PSD_error_model = gFT.Z_to_power_gFT(Z_error, np.diff(Gk.k)[0],N_per_stancil  , Lpoints )
-
-    #np.expand_dims(PSD_error_model, axis =)
-    G_error_model[bb] =  xr.DataArray(data = PSD_error_model, coords = I.drop('N_per_stancil').coords, name='gFT_PSD_data_error' ).expand_dims('beam')
-    G_error_data[bb] =  xr.DataArray(data = PSD_error_data, coords = I.drop('N_per_stancil').coords, name='gFT_PSD_data_error' ).expand_dims('beam')
-
-gFT_PSD_data_error_mean = xr.concat(G_error_model.values(), dim='beam')
-gFT_PSD_data_error_mean = xr.concat(G_error_data.values(), dim='beam')
-
-gFT_PSD_data_error_mean = ( gFT_PSD_data_error_mean.where( ~np.isnan(gFT_PSD_data_error_mean), 0) * Gk['N_per_stancil']).sum('beam')/Gk['N_per_stancil'].sum('beam')
-gFT_PSD_data_error_mean = ( gFT_PSD_data_error_mean.where( ~np.isnan(gFT_PSD_data_error_mean), 0) * Gk['N_per_stancil']).sum('beam')/Gk['N_per_stancil'].sum('beam')
-
-G_gFT_wmean['gFT_PSD_data_err'] = gFT_PSD_data_error_mean
-G_gFT_wmean['gFT_PSD_data_err'] = gFT_PSD_data_error_mean
-
-Gk['gFT_PSD_data_err'] = xr.concat(G_error_model.values(), dim='beam')
-Gk['gFT_PSD_data_err']  = xr.concat(G_error_data.values(), dim='beam')
-
-
-# %%
-
-G_gFT_smth = G_gFT_wmean['gFT_PSD_data'].rolling(k=30, center=True, min_periods=1).mean()
-G_gFT_smth['N_photons'] = G_gFT_wmean.N_photons
-G_gFT_smth["N_per_stancil_fraction"] = Gk['N_per_stancil'].T.mean('beam')/Gk.Lpoints.mean('beam')
-
-k = G_gFT_smth.k
-
-# %%
-# GG_no_nan = G_gFT_smth.isel( x = ~np.isnan(G_gFT_smth.mean('k')) )
-# k_lead_peak = GG_no_nan.k[GG_no_nan.isel(x=0).argmax().data].data
-# if k_lead_peak== k[0].data or k_lead_peak == k[-1].data:
-#     #raise ValueError('wavenumber Peak on Boundary!')
-#     print('wavenumber Peak on Boundary!')
-#     MT.json_save('B06_fail', plot_path+'../',  {'time':time.asctime( time.localtime(time.time()) ) , 'reason': 'wavenumber Peak on Boundary!'})
-#     print('exit()')
-#     #exit()
-#
-# # %%
-# k_lims =0.01
-# k_span = [k_lead_peak- k_lims , k_lead_peak, k_lead_peak+ k_lims]
-
-F = M.figure_axis_xy()
-#plt.loglog(k, k**(-2))
-# plt.loglog(k, 1e-4 *k**(-2))
-# plt.loglog(k, 1e-5 *k**(-3))
-
-# F.ax.axvline(k_span[0])
-# F.ax.axvline(k_span[1])
-# F.ax.axvline(k_span[2])
-#plt.plot(np.log(k), np.log( k**(-3) ) )
-#plt.loglog(k, (k)**(-3) - 1e5)
-
-plt.loglog(k, G_gFT_smth/k)
-# dd= dd.where(~np.isinf(dd), np.nan )
-#plt.grid()
-plt.title('displacement power Spectra', loc='left')
-
-# %%
-def define_noise_wavenumber_tresh_simple(data_xr, k_peak, k_end_lim =None,  plot_flag = False):
-
-    """
-    returns noise wavenumber on the high end of a spectral peak. This method fits a straight line in loglog speace using robust regression.
-    The noise level is defined as the wavenumber at which the residual error of a linear fit to the data is minimal.
-
-    inputs:
-    data_xr xarray.Dataarray with the power spectra with k as dimension
-    k_peak  wavenumber above which the searh should start
-    dk      the intervall over which the regrssion is repeated
-
-    returns:
-    k_end   the wavenumber at which the spectrum flattens
-    m       slope of the fitted line
-    b       intersect of the fitted line
-    """
-    #data_xr, k_peak =    G_gFT_smth.isel(x=0), k_lead_peak
-    #k_end_lim = None#
-    #k_end_lim= 0.06396283#0.0224938*1.05
-    from scipy.ndimage.measurements import label
-
-    if k_end_lim is None:
-        k_end_lim =data_xr.k[-1]
-
-    k_lead_peak_margin = k_peak *1.05
-    try:
-        data_log = np.log(data_xr).isel(k =(data_xr.k > k_lead_peak_margin)).rolling(k =10,  center=True, min_periods=1).mean()
-
-    except:
-        data_log = np.log(data_xr).isel(k =(data_xr.k > k_lead_peak_margin/2)).rolling(k =10,  center=True, min_periods=1).mean()
-
-    k_log= np.log(data_log.k)
-    try:
-        d_grad = data_log.differentiate('k').rolling(k =40, center=True, min_periods=4).mean()
-    except:
-        d_grad = data_log.differentiate('k').rolling(k =20, center=True, min_periods=2).mean()
-    ll = label( d_grad >=-5  )
-
-    #test if plausible minium exist:
-    # #print(ll[0][d_grad.k <= k_end_lim] )
-    # if sum(  ll[0][d_grad.k <= k_end_lim] ==0) == 0:
-    #     #print(sum(  ll[0][d_grad.k <= k_end_lim] ==0) == 0)
-    #     print('no gradient in range, set to peak')
-    #     return k_peak
-
-    if ll[0][0] !=0:
-        #print(sum(  ll[0][d_grad.k <= k_end_lim] ==0) == 0)
-        print('no decay, set to peak')
-        return k_peak
-
-    if sum(ll[0]) == 0:
-        k_end = d_grad.k[-1]
-    else:
-        k_end = d_grad.k[(ll[0] == 1) ][0].data
-
-    if plot_flag:
-        # plt.plot(np.log(d_grad.k), d_grad)
-        # plt.show()
-        plt.plot(np.log(data_xr.k), np.log(data_xr))
-        plt.plot(k_log, data_log )
-        plt.plot([np.log(k_end), np.log(k_end)], [-6, -5])
-        #print(k_end)
-    return k_end
-
-
-
-# %% new version
-def get_correct_breakpoint(pw_results):
-    br_points   = list()
-    for i in pw_results.keys():
-        [br_points.append(i) if 'breakpoint' in i else None]
-    br_points_df = pw_results[br_points]
-    br_points_sorted = br_points_df.sort_values()
-
-    alphas_sorted = [i.replace('breakpoint', 'alpha') for i in br_points_df.sort_values().index]
-    alphas_sorted.append('alpha'+ str(len(alphas_sorted)+1) )
-
-
-    betas_sorted = [i.replace('breakpoint', 'beta') for i in br_points_df.sort_values().index]
-
-    #betas_sorted
-    alphas_v2 = list()
-    alpha_i = pw_results['alpha1']
-    for i in [0] + list(pw_results[betas_sorted]):
-        alpha_i += i
-        alphas_v2.append(alpha_i)
-
-    alphas_v2_sorted   = pd.Series(index = alphas_sorted, data =alphas_v2)
-    br_points_sorted['breakpoint'+ str(br_points_sorted.size+1)] = 'end'
-
-    print('all alphas')
-    print(alphas_v2_sorted)
-    slope_mask = alphas_v2_sorted < 0
-
-    if sum(slope_mask) ==0:
-        print('no negative slope found, set to lowest')
-        breakpoint = 'start'
-    else:
-
-        # take steepest slope
-        alpah_v2_sub = alphas_v2_sorted[slope_mask]
-        print(alpah_v2_sub)
-        print(alpah_v2_sub.argmin())
-        break_point_name =  alpah_v2_sub.index[alpah_v2_sub.argmin()].replace('alpha', 'breakpoint')
-
-        # take first slope
-        #break_point_name = alphas_v2_sorted[slope_mask].index[0].replace('alpha', 'breakpoint')
-        breakpoint = br_points_sorted[break_point_name]
-
-    return breakpoint
-
-def get_breakingpoints(xx, dd):
-
-    import piecewise_regression
-    x2, y2 = xx, dd
-    convergence_flag =True
-    n_breakpoints= 3
-    while convergence_flag:
-        pw_fit = piecewise_regression.Fit(x2, y2, n_breakpoints=n_breakpoints)
-        print('n_breakpoints', n_breakpoints, pw_fit.get_results()['converged'])
-        convergence_flag = not pw_fit.get_results()['converged']
-        n_breakpoints += 1
-        if n_breakpoints >=4:
-            convergence_flag = False
-
-    pw_results = pw_fit.get_results()
-    #pw_fit.summary()
-
-    if pw_results['converged']:
-        # if pw_results['estimates']['alpha1']['estimate'] < 0:
-        #     print('decay at the front')
-        #     print('n_breakpoints',pw_fit.n_breakpoints )
-
-        pw_results_df = pd.DataFrame(pw_results['estimates']).loc['estimate']
-
-        breakpoint = get_correct_breakpoint(pw_results_df)
-
-        return pw_fit, breakpoint
-
-    else:
-        return pw_fit, False
-
-def define_noise_wavenumber_piecewise(data_xr, plot_flag = False):
-
-    data_log = data_xr
-    data_log = np.log(data_xr)
-
-    k =data_log.k.data
-    k_log= np.log(k)
-
-    pw_fit, breakpoint_log   = get_breakingpoints(k_log, data_log.data)
-
-    if breakpoint_log is 'start':
-        print('no decay, set to lowerst wavenumber')
-        breakpoint_log =  k_log[0]
-    if (breakpoint_log is 'end') | (breakpoint_log is False) :
-        print('higest wavenumner')
-        breakpoint_log =  k_log[-1]
-
-    breakpoint_pos                  = abs(k_log -breakpoint_log).argmin()
-    breakpoint_k                    = k[breakpoint_pos]
-
-    #plot_flag= False
-    if plot_flag:
-        # plt.plot(np.log(d_grad.k), d_grad)
-        # plt.show()
-        pw_fit.plot()
-        #plt.plot(np.log(data_xr.k), np.log(data_xr))
-        plt.plot(k_log, data_log )
-        #plt.gca().set_xscale('log')
-        #plt.plot([np.log(breakpoint_k), np.log(breakpoint_k)], [-6, -5])
-        #print(k_end)
-
-    return breakpoint_k, pw_fit
-
-#G_gFT_smth.isel(x=7).plot()
-
-k_lim_list = list()
-k_end_previous = np.nan
-x = G_gFT_smth.x.data[0]
-k = G_gFT_smth.k.data
-
-for x in G_gFT_smth.x.data:
-    #x = G_gFT_smth.isel(x=9).x
-    #x= 237500.0
-    print(x)
-    # use displacement power spectrum
-    k_end, pw_fit = define_noise_wavenumber_piecewise(G_gFT_smth.sel(x=x)/k, plot_flag =False )
-    #pw_fit.get_results()
-    #pw_fit.n_breakpoints
-
-    #pw_fit.summary()
-    #k_end, slope = define_noise_wavenumber_piecewise(G_gFT_smth.sel(x=x), k_lead_peak, k_end_lim= k_end_0, plot_flag =True )
-    #k_end = define_noise_wavenumber_tresh_simple(G_gFT_smth.sel(x=x), k_lead_peak, k_end_lim= k_end_0, plot_flag =True )
-
-
-    k_save = k_end_previous if k_end == k[0] else k_end
-    #k_save = k_end_previous if k_end >= k[-1]*0.95 else k_end
-
-    #k_save = k_end_previous if k_end == k[-1] else k_end
-    k_end_previous = k_save #if k_end_0 is None else k_end_0
-    k_lim_list.append(k_save)
-
-    #k_save = np.nan if slope >= 0 else k_end
-    # plt.gca().axvline(np.log(k_save), linewidth= 2, color='red')
-    # plt.show()
-    print('--------------------------')
-# %%
-# write k limits to datasets
-# lanczos.lanczos_filter_1d(G_gFT_smth.x, k_lim_list, 2)
-# lanczos.lanczos_filter_1d_wrapping
-
-font_for_pres()
-G_gFT_smth.coords['k_lim'] = ('x', k_lim_list )
-G_gFT_smth.k_lim.plot()
-#G_gFT_smth.k_lim.rolling(x=4,  center=True, min_periods=1).median().plot()
-k_lim_smth = G_gFT_smth.k_lim.rolling(x=3,  center=True, min_periods=1).mean()
-k_lim_smth.plot(c='r')
-
-plt.title('k_c filter', loc='left')
-F.save_light(path=plot_path, name = str(ID_name)+ '_B06_atten_ov')
-
-G_gFT_smth['k_lim']  = k_lim_smth #G_gFT_smth.k_lim.rolling(x=3,  center=True, min_periods=1).mean().plot(c='r').data
-G_gFT_wmean.coords['k_lim'] = k_lim_smth #('x', k_lim_smth )
-
-
-# %%
-font_for_print()
-
-fn = copy.copy(lstrings)
-F = M.figure_axis_xy(fig_sizes['two_column'][0], fig_sizes['two_column'][0]* 0.9, container= True, view_scale =1)
-
-
-plt.suptitle('Cut-off Frequency for Displacement Spectral\n' + io.ID_to_str(ID_name), y = 0.97)
-gs = GridSpec(8,3,  wspace=0.1,  hspace=1.5)#figure=fig,#
-
-#
-# #clev = M.clevels( [Gmean.quantile(0.6).data * 1e4, Gmean.quantile(0.99).data * 1e4], 31)/ 1e4
-#
-k_lims = G_gFT_wmean.k_lim
-xlims= G_gFT_wmean.k[0], G_gFT_wmean.k[-1]
-#
-k =high_beams[0]
-for pos, k, pflag in zip([gs[0:2, 0],gs[0:2, 1],gs[0:2, 2] ], high_beams, [True, False, False] ):
-    ax0 = F.fig.add_subplot(pos)
-    Gplot = Gk.sel(beam = k).isel(x = slice(0, -1)).gFT_PSD_data.squeeze().rolling(k=20, x=2, min_periods= 1, center=True).mean()
-    #Gplot.plot()
-
-    Gplot= Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)#.plot()
-    #Gplot.plot()
-
-
-    alpha_range= iter(np.linspace(1,0, Gplot.x.data.size))
-    for x in Gplot.x.data:
-        ialpha =next(alpha_range)
-        plt.loglog(Gplot.k, Gplot.sel(x=x)/Gplot.k, linewidth = 0.5, color= col.rels[k], alpha= ialpha)
-        ax0.axvline(k_lims.sel(x=x), linewidth= 0.4, color= 'black', zorder= 0, alpha=ialpha)
-
-    plt.title(next(fn) + k, color= col_dict[k], loc= 'left')
-    plt.xlim(xlims)
-    #
-    if pflag:
-        ax0.tick_params(labelbottom=False, bottom=True)
-        plt.ylabel("Power (m$^2$/k')")
-        plt.legend()
-    else:
-        ax0.tick_params(labelbottom=False, bottom=True, labelleft=False)
-
-for pos, k, pflag in zip([gs[2:4, 0],gs[2:4, 1],gs[2:4, 2] ], low_beams, [True, False, False] ):
-    ax0 = F.fig.add_subplot(pos)
-    Gplot = Gk.sel(beam = k).isel(x = slice(0, -1)).gFT_PSD_data.squeeze().rolling(k=20, x=2, min_periods= 1, center=True).mean()
-    #Gplot.mean('x').plot()
-
-    Gplot= Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)#.plot()
-
-    alpha_range= iter(np.linspace(1,0, Gplot.x.data.size))
-    for x in Gplot.x.data:
-        ialpha =next(alpha_range)
-        plt.loglog(Gplot.k, Gplot.sel(x=x)/Gplot.k, linewidth = 0.5, color= col.rels[k], alpha= ialpha)
-        ax0.axvline(k_lims.sel(x=x), linewidth= 0.4, color= 'black', zorder= 0, alpha=ialpha)
-
-    plt.title(next(fn) + k, color= col_dict[k], loc= 'left')
-    plt.xlim(xlims)
-    plt.xlabel("observed wavenumber k' ")
-
-    #
-    if pflag:
-        ax0.tick_params( bottom=True)
-        plt.ylabel("Power (m$^2$/k')")
-        plt.legend()
-    else:
-        ax0.tick_params(bottom=True, labelleft=False)
-
-F.save_light(path=plot_path, name =str(ID_name) + '_B06_atten_ov_simple')
-F.save_pup(path=plot_path, name = str(ID_name) + '_B06_atten_ov_simple')
-
-# %
-pos = gs[5:, 0:2]
-ax0 = F.fig.add_subplot(pos)
-
-lat_str = str(np.round( Gx.isel(x = 0).lat.mean().data, 2)  ) +' to ' + str(np.round( Gx.isel(x = -1).lat.mean().data, 2)  )
-plt.title(next(fn) + 'Mean Displacement Spectra\n(lat='+ lat_str +')', loc='left')
-
-dd = (10 * np.log( (G_gFT_smth/G_gFT_smth.k) .isel(x = slice(0, -1))))#.plot()
-dd = dd.where(~np.isinf(dd), np.nan)
-
-## filter out segments with less then 10% of data points
-dd= dd.where(G_gFT_smth["N_per_stancil_fraction"] >= 0.1)#.plot()
-
-dd_lims = np.round(dd.quantile(0.01).data*0.95, 0) , np.round(dd.quantile(0.95).data*1.05, 0)
-plt.pcolor(dd.x/1e3, dd.k, dd, vmin=dd_lims[0], vmax= dd_lims[-1], cmap = col.white_base_blgror)
-cb = plt.colorbar(orientation= 'vertical')
-
-cb.set_label('Power (m$^2$/k)')
-plt.plot( G_gFT_smth.isel(x = slice(0, -1)).x/1e3 ,  G_gFT_smth.isel(x = slice(0, -1)).k_lim , color= col.black, linewidth = 1)
-plt.ylabel('wavenumber k')
-plt.xlabel('X (km)')
-
-pos = gs[6:, -1]
-ax9 = F.fig.add_subplot(pos)
-
-plt.title('Data Coverage (%)', loc ='left')
-plt.plot(G_gFT_smth.x/1e3 , G_gFT_smth["N_per_stancil_fraction"]*100 , linewidth = 0.8, color = 'black')
-ax9.spines['left'].set_visible(False)
-ax9.spines['right'].set_visible(True)
-ax9.tick_params(labelright=True, right=True, labelleft=False, left=False)
-ax9.axhline(10, linewidth = 0.8, linestyle= '--', color ='black')
-#plt.ylabel('(%)')
-plt.xlabel('X (km)')
-
-
-F.save_light(path=plot_path, name =str(ID_name) + '_B06_atten_ov')
-F.save_pup(path=plot_path, name = str(ID_name) + '_B06_atten_ov')
-
-
-# %% reconstruct slope displacement data
-def fit_offset(x, data,  model, nan_mask, deg):
-
-    #x, data,  model, nan_mask, deg = dist_stencil, height_data, height_model, dist_nanmask, 1
-    p_offset = np.polyfit(x[~nan_mask], data[~nan_mask] - model[~nan_mask], deg)
-    p_offset[-1] = 0
-    poly_offset = np.polyval(p_offset,x )
-    return poly_offset
-
-def tanh_fitler(x, x_cutoff , sigma_g= 0.01):
-    """
-        zdgfsg
-    """
-
-    decay   =  0.5 - np.tanh( (x-x_cutoff)/sigma_g  )/2
-    return decay
-
-
-#plt.plot(x, tanh_fitler(Gk_1.k, k_thresh, sigma_g= 0.003) )
-
-
-def reconstruct_displacement(Gx_1, Gk_1, T3, k_thresh):
-
-    """
-    reconstructs photon displacement heights for each stancil given the model parameters in Gk_1
-    A low-pass frequeny filter can be applied using k-thresh
-
-    inputs:
-    Gk_1    model data per stencil from _gFT_k file with sin and cos coefficients
-    Gx_1    real data per stencil from _gFT_x file with mean photon heights and coordindate systems
-    T3
-    k_thresh (None) threshold for low-pass filter
-
-    returns:
-    height_model  reconstucted displements heights of the stancil
-    poly_offset   fitted staight line to the residual between observations and model to account for low-pass variability
-    nan_mask      mask where is observed data in
-    """
-
-    dist_stencil = Gx_1.eta + Gx_1.x
-    dist_stencil_lims = dist_stencil[0].data, dist_stencil[-1].data
-
-    gFT_cos_coeff_sel = np.copy(Gk_1.gFT_cos_coeff)
-    gFT_sin_coeff_sel = np.copy(Gk_1.gFT_sin_coeff)
-
-    gFT_cos_coeff_sel = gFT_cos_coeff_sel*tanh_fitler(Gk_1.k, k_thresh, sigma_g= 0.003)
-    gFT_sin_coeff_sel = gFT_sin_coeff_sel*tanh_fitler(Gk_1.k, k_thresh, sigma_g= 0.003)
-
-    # gFT_cos_coeff_sel[Gk_1.k > k_thresh] = 0
-    # gFT_sin_coeff_sel[Gk_1.k > k_thresh] = 0
-
-
-    FT_int = gFT.generalized_Fourier(Gx_1.eta + Gx_1.x, None,Gk_1.k )
-    _ = FT_int.get_H()
-    FT_int.p_hat = np.concatenate([ -gFT_sin_coeff_sel /Gk_1.k, gFT_cos_coeff_sel/Gk_1.k ])
-
-    dx = Gx.eta.diff('eta').mean().data
-    height_model = FT_int.model() /dx# + T3_sel['heights_c_weighted_mean'].iloc[0]
-
-    dist_nanmask = np.isnan(Gx_1.y_data)
-    height_data  = np.interp(dist_stencil, T3_sel['dist'],  T3_sel['heights_c_weighted_mean']) #[~np.isnan(Gx_1.y_data)]
-    #poly_offset = fit_offset(dist_stencil, height_data, height_model, dist_nanmask, 1)
-
-    return height_model, np.nan, dist_nanmask
-
-# cutting Table data
-
-
-# %%
-G_height_model=dict()
-k       = 'gt2l'
-for bb in Gx.beam.data:
-    G_height_model_temp= dict()
-    for i in np.arange(Gx.x.size):
-        #k_thresh= 4
-
-        Gx_1    = Gx.isel(x= i).sel(beam = bb)
-        Gk_1    = Gk.isel(x= i).sel(beam = bb)
-        k_thresh= G_gFT_smth.k_lim.isel(x=0).data
-
-
-        dist_stencil        = Gx_1.eta + Gx_1.x
-        dist_stencil_lims   = dist_stencil[0].data, dist_stencil[-1].data
-        dist_stencil_lims_plot = dist_stencil_lims#Gx_1.eta[0]*0.25 + Gx_1.x, Gx_1.eta[-1]*0.25 + Gx_1.x
-        dist_stencil_lims_plot = Gx_1.eta[0]*1 + Gx_1.x, Gx_1.eta[-1]*1 + Gx_1.x
-
-        T3_sel              = B3[k].loc[( (B3[k]['dist']   >= dist_stencil_lims[0])    & (B3[k]['dist']    <= dist_stencil_lims[1])   )]
-        #T2_sel              = B2[k].loc[(  B2[k]['x_true'] >= T3_sel['x_true'].min() ) & ( B2[k]['x_true'] <= T3_sel['x_true'].max()  )]
-
-        if T3_sel.shape[0] != 0:
-            # if T3_sel['x_true'].iloc[-1] < T3_sel['x_true'].iloc[0]:
-            #     dist_T2_temp =np.interp(T2_sel['x_true'][::-1], T3_sel['x_true'][::-1],  T3_sel['dist'][::-1] )
-            #     T2_sel['dist']      = dist_T2_temp[::-1]
-            # else:
-            #     dist_T2_temp =np.interp(T2_sel['x_true'], T3_sel['x_true'],  T3_sel['dist'] )
-            #     T2_sel['dist']      = dist_T2_temp
-
-            height_model, poly_offset, dist_nanmask = reconstruct_displacement(Gx_1, Gk_1, T3_sel, k_thresh = k_thresh)
-            poly_offset = poly_offset*0
-            G_height_model_temp[str(i) + bb]     = xr.DataArray(height_model, coords=Gx_1.coords, dims= Gx_1.dims, name = 'height_model' )
-        else:
-            G_height_model_temp[str(i) + bb]     = xr.DataArray(Gx_1.y_model.data, coords=Gx_1.coords, dims= Gx_1.dims, name = 'height_model' )
-
-        #G_height_nans[i]      = xr.DataArray(dist_nanmask, coords=Gx_1.coords, dims= Gx_1.dims, name = 'nanmask' )
-
-        # jsut for plotting:
-        # # corrected rar Photon heights
-        # T2_sel['heights_c_residual']            = photon_height_residual = T2_sel['heights_c'] - np.interp(T2_sel['dist'], dist_stencil, height_model +  poly_offset)
-        #
-        # # interpolate rebinned photon heights
-        # heights_c_weighted_mean_stancil         = np.interp(dist_stencil, T3_sel['dist'], T3_sel['heights_c_weighted_mean'] )
-        #
-        # # corrected rebinned photon heights
-        # photon_height_residual_mean                = heights_c_weighted_mean_stancil   - (height_model + poly_offset)
-        # photon_height_residual_mean[dist_nanmask]  = np.nan
-        # T3_sel['heights_c_weighted_mean_residual'] = T3_sel['heights_c_weighted_mean'] - np.interp(T3_sel['dist'], dist_stencil, height_model +  poly_offset )
-
-        #plot
-        # font_for_pres()
-        # M.figure_axis_xy(5.5, 6, view_scale = 0.8)
-        #
-        # plt.subplot(3,1 ,1)
-        # plt.scatter(T2_sel['dist'], T2_sel['heights_c'], s= 1,  marker='o', color='black',   alpha =0.2, edgecolors= 'none' )
-        # #plt.scatter(T3_sel['dist'], T3_sel['heights_c_weighted_mean'], s= 1,  marker='o', color='black',   alpha =0.2, edgecolors= 'none' )
-        # plt.plot(T3_sel['dist'], T3_sel['heights_c_weighted_mean'] , color =col.rascade1, linewidth = 0.5, label = 'residual $h_c$')
-        # plt.xlim(dist_stencil_lims_plot)
-        # plt.ylim(0, 1.5)
-        #
-        # ax1 = plt.subplot(3,1 ,2)
-        # plt.plot(dist_stencil, height_model + poly_offset ,'-', c='red', linewidth=0.8, alpha=1,zorder= 12, label = 'GFT height model + correction')
-        # plt.plot(dist_stencil, height_model ,'-', c='orange', linewidth=0.8, alpha=0.5,zorder= 2, label = 'GFT height model')
-        # plt.legend(loc = 1)
-        # plt.xlim(dist_stencil_lims_plot)
-        # ax1.axhline(0, linewidth=0.5, color= 'black')
-        #
-        # plt.subplot(3,1 ,3)
-        # plt.scatter(T2_sel['dist'], T2_sel['heights_c_residual'], s= 1,  marker='o', color='black',   alpha =0.5, edgecolors= 'none', zorder=6 )
-        # #plt.scatter(T2_sel['dist'], T2_sel['heights_c_residual'], s= 1,  marker='o', color='black',   alpha =1, edgecolors= 'none' )
-        #
-        # plt.plot(T3_sel['dist'], T3_sel['heights_c_weighted_mean_residual'],'-', c=col.rascade2, linewidth=0.5, alpha=1, zorder= 10, label = 'GFT height model + correction')
-        # #plt.plot(dist_stencil, photon_height_residual_mean,'-', c='red', linewidth=0.3, alpha=1, zorder= 2, label = 'GFT height model + correction')
-        # plt.fill_between(dist_stencil , photon_height_residual_mean, color= col.cascade2, edgecolor = None, alpha = 1, zorder= 0)
-        #
-        # plt.xlim(dist_stencil_lims_plot)
-        # plt.ylim(0, 1.5)
-
-    G_height_model[bb] = xr.concat(G_height_model_temp.values(), dim= 'x').T
-
-Gx['height_model'] = xr.concat(G_height_model.values(), dim= 'beam').transpose('eta', 'beam', 'x')
-
-# %%
-Gx_v2, B2_v2, B3_v2 = dict(), dict(), dict()
-for bb in Gx.beam.data:
-    print(bb)
-    Gx_k                 = Gx.sel(beam = bb)
-    #Gx_k['height_model'] = xr.concat(G_height_model.values(), dim= 'x').T#.plot()
-    Gh          = Gx['height_model'].sel(beam = bb).T
-    Gh_err      = Gx_k['model_error_x'].T
-    Gnans       = np.isnan(Gx_k.y_model)
-
-    concented_heights   = Gh.data.reshape(Gh.data.size)
-    concented_err       = Gh_err.data.reshape(Gh.data.size)
-    concented_nans      = Gnans.data.reshape(Gnans.data.size)
-    concented_x         = (Gh.x+Gh.eta).data.reshape(Gh.data.size)
-
-    dx                      = Gh.eta.diff('eta')[0].data
-    continous_x_grid        = np.arange(concented_x.min(), concented_x.max(), dx)
-    continous_height_model  = np.interp(continous_x_grid, concented_x, concented_heights )
-    concented_err           = np.interp(continous_x_grid, concented_x, concented_err )
-    continous_nans          = np.interp(continous_x_grid, concented_x, concented_nans ) ==1
-
-    T3              = B3[bb]#.loc[( (B3[k]['dist']   >= dist_stencil_lims[0])    & (B3[k]['dist']    <= dist_stencil_lims[1])   )]
-    #T2              = B2[bb]#.loc[(  B2[k]['x_true'] >= T3_sel['x_true'].min() ) & ( B2[k]['x_true'] <= T3_sel['x_true'].max()  )]
-
-    #T2 = T2.sort_values('x_true')
-    T3 = T3.sort_values('x')
-    #T2['dist']    = np.interp(T2['x_true'], T3['x_true'],  T3['dist'] )
-    #T2 = T2.sort_values('dist')
-    T3 = T3.sort_values('dist')
-
-    #T2              = T2.sort_index()
-    #T2['dist']      = np.interp(T2['x_true'], T3['x_true'],  T3['dist'] )
-
-    T3['heights_c_model']     = np.interp(T3['dist'], continous_x_grid, continous_height_model)
-    T3['heights_c_model_err'] = np.interp(T3['dist'], continous_x_grid, concented_err)
-    T3['heights_c_residual']  = T3['heights_c_weighted_mean'] - T3['heights_c_model']
-
-    #T2['heights_c_model']     = np.interp(T2['dist'], continous_x_grid, continous_height_model)
-    #T2['heights_c_residual']  = T2['heights_c'] - T2['heights_c_model']
-
-
-    #B2_v2[bb] = T2
-    B3_v2[bb] = T3
-    Gx_v2[bb] = Gx_k
-
-    # font_for_print()
-    # F = M.figure_axis_xy(6, 2, view_scale= 0.7)
-    #
-    # plt.plot(T2['dist'] , T2['heights_c']+2,'ok', markersize=0.8, alpha=0.5, label='org photon height_c')
-    # plt.plot(T3['dist'] , T3['heights_c_weighted_mean']+2,'.r', markersize=1, alpha=0.5, label='org photon wmean')
-    #
-    # plt.plot(T2['dist'] , T2['heights_c_model'], '.', markersize=1, alpha=0.8, label='height model', color=col.orange, zorder= 12)
-    # F.ax.axhline(2, linewidth = .7, color= 'black')
-    # F.ax.axhline(0, linewidth = .7, color= 'black')
-    # F.ax.axhline(-2, linewidth = .7, color= 'black')
-    #
-    # plt.plot(T2['dist'] , T2['heights_c_residual']-2,'ob', markersize=0.5, alpha=0.5, label='residual photons')
-    # plt.plot(T3['dist'], T3['heights_c_residual']-2 , 'r', linewidth= 0.8, zorder=12, label='photon height_c resodual')
-    #
-    # xlims = np.nanmean(T2['dist']), np.nanmean(T2['dist'])+7e3
-    # plt.xlim(xlims)
-    # dlim = np.nanmax(T3['heights_c_residual'][(T3['dist']> xlims[0]) & (T3['dist'] < xlims[1])])
-    # #plt.ylim(-dlim*1.5, dlim*1.5)
-    # try:
-    #     plt.ylim((-2-1.5*dlim), 2+1.5*dlim)
-    # except:
-    #     plt.ylim(-5, 5)
-    # plt.legend( ncol= 4)
-    #F.save_light(path = plot_path , name = 'B06_'+bb+'__check')
-
-
-# %% correct wave incident direction
-
-#load_path = mconfig['paths']['work'] + '/B04_angle/'
-
-try:
-    G_angle = xr.open_dataset(load_path_angle+ '/B05_'+ID_name + '_angle_pdf.nc' )
-
-    font_for_pres()
-
-    Ga_abs = (G_angle.weighted_angle_PDF_smth.isel(angle = G_angle.angle > 0).data + G_angle.weighted_angle_PDF_smth.isel(angle = G_angle.angle < 0).data[:,::-1])/2
-    Ga_abs = xr.DataArray(data=Ga_abs.T, dims = G_angle.dims, coords=G_angle.isel(angle = G_angle.angle > 0).coords)
-
-    Ga_abs_front = Ga_abs.isel(x= slice(0, 3))
-    Ga_best = ((  Ga_abs_front * Ga_abs_front.N_data ).sum('x')/Ga_abs_front.N_data.sum('x'))
-
-    theta = Ga_best.angle[Ga_best.argmax()].data
-    theta_flag = True
-
-    font_for_print()
-    F = M.figure_axis_xy(3, 5, view_scale= 0.7)
-
-    plt.subplot(2, 1, 1)
-    plt.pcolor(Ga_abs)
-    plt.xlabel('abs angle')
-    plt.ylabel('x')
-
-    ax = plt.subplot(2, 1, 2)
-    Ga_best.plot()
-    plt.title('angle front ' + str(theta*180/np.pi), loc='left')
-    ax.axvline(theta, color= 'red')
-    F.save_light(path = plot_path , name = 'B06_angle_def')
-except:
-
-    print('no angle data found, skip angle corretion')
-    theta= 0
-    theta_flag = False
-
-# %%
-lam_p   = 2 *np.pi/Gk.k
-lam     = lam_p * np.cos(theta)
-
-if theta_flag:
-    k_corrected  = 2 * np.pi/lam
-    x_corrected  = Gk.x * np.cos(theta)
-else:
-    k_corrected  = 2 * np.pi/lam *np.nan
-    x_corrected  = Gk.x * np.cos(theta) *np.nan
-
-# %% spectral save
-G5 = G_gFT_wmean.expand_dims(dim = 'beam', axis = 1)
-G5.coords['beam'] = ['weighted_mean']#(('beam'), 'weighted_mean')
-G5 = G5.assign_coords(N_photons= G5.N_photons)
-G5['N_photons'] = G5['N_photons'].expand_dims('beam')
-G5['N_per_stancil_fraction'] = G5['N_per_stancil_fraction'].expand_dims('beam')
-
-Gk_v2 = xr.merge([Gk, G5])
-
-Gk_v2 = Gk_v2.assign_coords(x_corrected=("x", x_corrected.data)).assign_coords(k_corrected=("k", k_corrected.data))
-
-Gk_v2.attrs['best_guess_incident_angle'] = theta
-
-# save collected spectral data
-Gk_v2.to_netcdf(save_path+'/B06_'+ID_name + '_gFT_k_corrected.nc' )
-Gx
-# %% save real space data
-Gx.to_netcdf(save_path+'/B06_'+ID_name + '_gFT_x_corrected.nc' )
-try:
-    io.save_pandas_table(B2_v2, 'B06_' +ID_name + '_B06_corrected_resid' , save_path) # all photos but heights adjusted and with distance coordinate
-except:
-    os.remove(save_path+'B06_' +ID_name + '_B06_corrected_resid.h5')
-    io.save_pandas_table(B2_v2, 'B06_' +ID_name + '_B06_corrected_resid' , save_path) # all photos but heights adjusted and with distance coordinate
-
-try:
-    io.save_pandas_table(B3_v2, 'B06_' +ID_name + '_binned_resid' , save_path) # regridding heights
-except:
-    os.remove(save_path+'B06_' +ID_name + '_binned_resid.h5')
-    io.save_pandas_table(B3_v2, 'B06_' +ID_name + '_binned_resid' , save_path) # regridding heights
-
-MT.json_save('B06_success', plot_path + '../', {'time':time.asctime( time.localtime(time.time()) )})
-print('done. saved target at ' + plot_path + '../B06_success' )
-
-# %%
\ No newline at end of file
diff --git a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
new file mode 100644
index 00000000..e69de29b

From 5b4e835a1a830742f8928b8f92121d8b0d897eef Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Thu, 1 Feb 2024 08:50:38 -0500
Subject: [PATCH 18/30] adding 7th step to workflow. formatting and cleaning
 B06_correct_separate_var.py file

---
 .../test-B01_SL_load_single_file.yml          |   2 +
 .../analysis_db/B06_correct_separate_var.py   | 773 ++++++++++++++++++
 2 files changed, 775 insertions(+)

diff --git a/.github/workflows/test-B01_SL_load_single_file.yml b/.github/workflows/test-B01_SL_load_single_file.yml
index 54fc432f..790fdc01 100644
--- a/.github/workflows/test-B01_SL_load_single_file.yml
+++ b/.github/workflows/test-B01_SL_load_single_file.yml
@@ -35,3 +35,5 @@ jobs:
         run: python src/icesat2_tracks/analysis_db/B04_angle.py SH_20190502_05180312 SH_testSLsinglefile2 True
       - name: Sixth step B05_define_angle
         run: python src/icesat2_tracks/analysis_db/B05_define_angle.py SH_20190502_05180312 SH_testSLsinglefile2 True
+      - name: Seventh step B06_correct_separate_var
+        run: python src/icesat2_tracks/analysis_db/B06_correct_separate_var.py SH_20190502_05180312 SH_testSLsinglefile2 True
diff --git a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
index e69de29b..0289c9c4 100644
--- a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
+++ b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
@@ -0,0 +1,773 @@
+"""
+This file open a ICEsat2 track applied filters and corections and returns smoothed photon heights on a regular grid in an .nc file.
+This is python 3
+"""
+import os, sys
+
+from icesat2_tracks.config.IceSAT2_startup import (
+    mconfig,
+    color_schemes,
+    font_for_pres,
+    font_for_print,
+    plt,
+    lstrings,
+    fig_sizes,
+)
+
+import h5py
+import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.local_modules.m_tools_ph3 as MT
+from icesat2_tracks.local_modules import m_general_ph3 as M
+import time
+import copy
+import icesat2_tracks.ICEsat2_SI_tools.generalized_FT as gFT
+from scipy.ndimage.measurements import label
+import pandas as pd
+import xarray as xr
+import numpy as np
+from matplotlib.gridspec import GridSpec
+
+xr.set_options(display_style="text")
+ID_name, batch_key, test_flag = io.init_from_input(sys.argv)
+hemis, batch = batch_key.split("_")
+
+all_beams = mconfig["beams"]["all_beams"]
+high_beams = mconfig["beams"]["high_beams"]
+low_beams = mconfig["beams"]["low_beams"]
+
+load_path_work = mconfig["paths"]["work"] + "/" + batch_key + "/"
+B3_hdf5 = h5py.File(
+    load_path_work + "B01_regrid" + "/" + ID_name + "_B01_binned.h5", "r"
+)
+
+
+load_path_angle = mconfig["paths"]["work"] + "/" + batch_key + "/B04_angle/"
+
+B3 = dict()
+for b in all_beams:
+    B3[b] = io.get_beam_hdf_store(B3_hdf5[b])
+
+B3_hdf5.close()
+
+load_file = load_path_work + "/B02_spectra/" + "B02_" + ID_name  # + '.nc'
+Gk = xr.open_dataset(load_file + "_gFT_k.nc")
+Gx = xr.open_dataset(load_file + "_gFT_x.nc")
+Gfft = xr.open_dataset(load_file + "_FFT.nc")
+
+plot_path = (
+    mconfig["paths"]["plot"]
+    + "/"
+    + hemis
+    + "/"
+    + batch_key
+    + "/"
+    + ID_name
+    + "/B06_correction/"
+)
+MT.mkdirs_r(plot_path)
+
+save_path = mconfig["paths"]["work"] + batch_key + "/B06_corrected_separated/"
+MT.mkdirs_r(save_path)
+
+
+color_schemes.colormaps2(31, gamma=1)
+col_dict = color_schemes.rels
+
+
+def dict_weighted_mean(Gdict, weight_key):
+    """
+    returns the weighted meean of a dict of xarray, data_arrays
+    weight_key must be in the xr.DataArrays
+    """
+
+    akey = list(Gdict.keys())[0]
+    GSUM = Gdict[akey].copy()
+    GSUM.data = np.zeros(GSUM.shape)
+    N_per_stancil = GSUM.N_per_stancil * 0
+    N_photons = np.zeros(GSUM.N_per_stancil.size)
+
+    counter = 0
+    for k, I in Gdict.items():
+        I = I.squeeze()
+        print(len(I.x))
+        if len(I.x) != 0:
+            GSUM += I.where(~np.isnan(I), 0) * I[weight_key]
+            N_per_stancil += I[weight_key]
+        if "N_photons" in GSUM.coords:
+            N_photons += I["N_photons"]
+        counter += 1
+
+    GSUM = GSUM / N_per_stancil
+
+    if "N_photons" in GSUM.coords:
+        GSUM.coords["N_photons"] = (("x", "beam"), np.expand_dims(N_photons, 1))
+
+    GSUM["beam"] = ["weighted_mean"]
+    GSUM.name = "power_spec"
+
+    return GSUM
+
+
+G_gFT_wmean = (Gk.where(~np.isnan(Gk["gFT_PSD_data"]), 0) * Gk["N_per_stancil"]).sum(
+    "beam"
+) / Gk["N_per_stancil"].sum("beam")
+G_gFT_wmean["N_photons"] = Gk["N_photons"].sum("beam")
+
+G_fft_wmean = (Gfft.where(~np.isnan(Gfft), 0) * Gfft["N_per_stancil"]).sum(
+    "beam"
+) / Gfft["N_per_stancil"].sum("beam")
+G_fft_wmean["N_per_stancil"] = Gfft["N_per_stancil"].sum("beam")
+
+
+# plot
+# derive spectral errors:
+Lpoints = Gk.Lpoints.mean("beam").data
+N_per_stancil = Gk.N_per_stancil.mean("beam").data  # [0:-2]
+
+G_error_model = dict()
+G_error_data = dict()
+
+for bb in Gk.beam.data:
+    I = Gk.sel(beam=bb)
+    b_bat_error = np.concatenate([I.model_error_k_cos.data, I.model_error_k_sin.data])
+    Z_error = gFT.complex_represenation(b_bat_error, Gk.k.size, Lpoints)
+    PSD_error_data, PSD_error_model = gFT.Z_to_power_gFT(
+        Z_error, np.diff(Gk.k)[0], N_per_stancil, Lpoints
+    )
+
+    G_error_model[bb] = xr.DataArray(
+        data=PSD_error_model,
+        coords=I.drop("N_per_stancil").coords,
+        name="gFT_PSD_data_error",
+    ).expand_dims("beam")
+    G_error_data[bb] = xr.DataArray(
+        data=PSD_error_data,
+        coords=I.drop("N_per_stancil").coords,
+        name="gFT_PSD_data_error",
+    ).expand_dims("beam")
+
+gFT_PSD_data_error_mean = xr.concat(G_error_model.values(), dim="beam")
+gFT_PSD_data_error_mean = xr.concat(G_error_data.values(), dim="beam")
+
+gFT_PSD_data_error_mean = (
+    gFT_PSD_data_error_mean.where(~np.isnan(gFT_PSD_data_error_mean), 0)
+    * Gk["N_per_stancil"]
+).sum("beam") / Gk["N_per_stancil"].sum("beam")
+gFT_PSD_data_error_mean = (
+    gFT_PSD_data_error_mean.where(~np.isnan(gFT_PSD_data_error_mean), 0)
+    * Gk["N_per_stancil"]
+).sum("beam") / Gk["N_per_stancil"].sum("beam")
+
+G_gFT_wmean["gFT_PSD_data_err"] = gFT_PSD_data_error_mean
+G_gFT_wmean["gFT_PSD_data_err"] = gFT_PSD_data_error_mean
+
+Gk["gFT_PSD_data_err"] = xr.concat(G_error_model.values(), dim="beam")
+Gk["gFT_PSD_data_err"] = xr.concat(G_error_data.values(), dim="beam")
+
+
+#
+
+G_gFT_smth = (
+    G_gFT_wmean["gFT_PSD_data"].rolling(k=30, center=True, min_periods=1).mean()
+)
+G_gFT_smth["N_photons"] = G_gFT_wmean.N_photons
+G_gFT_smth["N_per_stancil_fraction"] = Gk["N_per_stancil"].T.mean(
+    "beam"
+) / Gk.Lpoints.mean("beam")
+
+k = G_gFT_smth.k
+
+F = M.figure_axis_xy()
+
+plt.loglog(k, G_gFT_smth / k)
+
+plt.title("displacement power Spectra", loc="left")
+
+
+def define_noise_wavenumber_tresh_simple(
+    data_xr, k_peak, k_end_lim=None, plot_flag=False
+):
+    """
+    returns noise wavenumber on the high end of a spectral peak. This method fits a straight line in loglog speace using robust regression.
+    The noise level is defined as the wavenumber at which the residual error of a linear fit to the data is minimal.
+
+    inputs:
+    data_xr xarray.Dataarray with the power spectra with k as dimension
+    k_peak  wavenumber above which the searh should start
+    dk      the intervall over which the regrssion is repeated
+
+    returns:
+    k_end   the wavenumber at which the spectrum flattens
+    m       slope of the fitted line
+    b       intersect of the fitted line
+    """
+    from scipy.ndimage.measurements import label
+
+    if k_end_lim is None:
+        k_end_lim = data_xr.k[-1]
+
+    k_lead_peak_margin = k_peak * 1.05
+    try:
+        data_log = (
+            np.log(data_xr)
+            .isel(k=(data_xr.k > k_lead_peak_margin))
+            .rolling(k=10, center=True, min_periods=1)
+            .mean()
+        )
+
+    except:
+        data_log = (
+            np.log(data_xr)
+            .isel(k=(data_xr.k > k_lead_peak_margin / 2))
+            .rolling(k=10, center=True, min_periods=1)
+            .mean()
+        )
+
+    k_log = np.log(data_log.k)
+    try:
+        d_grad = (
+            data_log.differentiate("k").rolling(k=40, center=True, min_periods=4).mean()
+        )
+    except:
+        d_grad = (
+            data_log.differentiate("k").rolling(k=20, center=True, min_periods=2).mean()
+        )
+    ll = label(d_grad >= -5)
+
+    if ll[0][0] != 0:
+        print("no decay, set to peak")
+        return k_peak
+
+    if sum(ll[0]) == 0:
+        k_end = d_grad.k[-1]
+    else:
+        k_end = d_grad.k[(ll[0] == 1)][0].data
+
+    if plot_flag:
+        plt.plot(np.log(data_xr.k), np.log(data_xr))
+        plt.plot(k_log, data_log)
+        plt.plot([np.log(k_end), np.log(k_end)], [-6, -5])
+    return k_end
+
+
+# new version
+def get_correct_breakpoint(pw_results):
+    br_points = list()
+    for i in pw_results.keys():
+        [br_points.append(i) if "breakpoint" in i else None]
+    br_points_df = pw_results[br_points]
+    br_points_sorted = br_points_df.sort_values()
+
+    alphas_sorted = [
+        i.replace("breakpoint", "alpha") for i in br_points_df.sort_values().index
+    ]
+    alphas_sorted.append("alpha" + str(len(alphas_sorted) + 1))
+
+    betas_sorted = [
+        i.replace("breakpoint", "beta") for i in br_points_df.sort_values().index
+    ]
+
+    # betas_sorted
+    alphas_v2 = list()
+    alpha_i = pw_results["alpha1"]
+    for i in [0] + list(pw_results[betas_sorted]):
+        alpha_i += i
+        alphas_v2.append(alpha_i)
+
+    alphas_v2_sorted = pd.Series(index=alphas_sorted, data=alphas_v2)
+    br_points_sorted["breakpoint" + str(br_points_sorted.size + 1)] = "end"
+
+    print("all alphas")
+    print(alphas_v2_sorted)
+    slope_mask = alphas_v2_sorted < 0
+
+    if sum(slope_mask) == 0:
+        print("no negative slope found, set to lowest")
+        breakpoint = "start"
+    else:
+        # take steepest slope
+        alpah_v2_sub = alphas_v2_sorted[slope_mask]
+        print(alpah_v2_sub)
+        print(alpah_v2_sub.argmin())
+        break_point_name = alpah_v2_sub.index[alpah_v2_sub.argmin()].replace(
+            "alpha", "breakpoint"
+        )
+
+        # take first slope
+        breakpoint = br_points_sorted[break_point_name]
+
+    return breakpoint
+
+
+def get_breakingpoints(xx, dd):
+    import piecewise_regression
+
+    x2, y2 = xx, dd
+    convergence_flag = True
+    n_breakpoints = 3
+    while convergence_flag:
+        pw_fit = piecewise_regression.Fit(x2, y2, n_breakpoints=n_breakpoints)
+        print("n_breakpoints", n_breakpoints, pw_fit.get_results()["converged"])
+        convergence_flag = not pw_fit.get_results()["converged"]
+        n_breakpoints += 1
+        if n_breakpoints >= 4:
+            convergence_flag = False
+
+    pw_results = pw_fit.get_results()
+
+    if pw_results["converged"]:
+        pw_results_df = pd.DataFrame(pw_results["estimates"]).loc["estimate"]
+
+        breakpoint = get_correct_breakpoint(pw_results_df)
+
+        return pw_fit, breakpoint
+
+    else:
+        return pw_fit, False
+
+
+def define_noise_wavenumber_piecewise(data_xr, plot_flag=False):
+    data_log = data_xr
+    data_log = np.log(data_xr)
+
+    k = data_log.k.data
+    k_log = np.log(k)
+
+    pw_fit, breakpoint_log = get_breakingpoints(k_log, data_log.data)
+
+    if breakpoint_log is "start":
+        print("no decay, set to lowerst wavenumber")
+        breakpoint_log = k_log[0]
+    if (breakpoint_log is "end") | (breakpoint_log is False):
+        print("higest wavenumner")
+        breakpoint_log = k_log[-1]
+
+    breakpoint_pos = abs(k_log - breakpoint_log).argmin()
+    breakpoint_k = k[breakpoint_pos]
+
+    if plot_flag:
+        pw_fit.plot()
+        plt.plot(k_log, data_log)
+
+    return breakpoint_k, pw_fit
+
+
+k_lim_list = list()
+k_end_previous = np.nan
+x = G_gFT_smth.x.data[0]
+k = G_gFT_smth.k.data
+
+for x in G_gFT_smth.x.data:
+    print(x)
+    # use displacement power spectrum
+    k_end, pw_fit = define_noise_wavenumber_piecewise(
+        G_gFT_smth.sel(x=x) / k, plot_flag=False
+    )
+
+    k_save = k_end_previous if k_end == k[0] else k_end
+    k_end_previous = k_save
+    k_lim_list.append(k_save)
+    print("--------------------------")
+
+font_for_pres()
+G_gFT_smth.coords["k_lim"] = ("x", k_lim_list)
+G_gFT_smth.k_lim.plot()
+k_lim_smth = G_gFT_smth.k_lim.rolling(x=3, center=True, min_periods=1).mean()
+k_lim_smth.plot(c="r")
+
+plt.title("k_c filter", loc="left")
+F.save_light(path=plot_path, name=str(ID_name) + "_B06_atten_ov")
+
+G_gFT_smth["k_lim"] = k_lim_smth
+G_gFT_wmean.coords["k_lim"] = k_lim_smth
+
+font_for_print()
+
+fn = copy.copy(lstrings)
+F = M.figure_axis_xy(
+    fig_sizes["two_column"][0],
+    fig_sizes["two_column"][0] * 0.9,
+    container=True,
+    view_scale=1,
+)
+
+
+plt.suptitle(
+    "Cut-off Frequency for Displacement Spectral\n" + io.ID_to_str(ID_name), y=0.97
+)
+gs = GridSpec(8, 3, wspace=0.1, hspace=1.5)
+
+k_lims = G_gFT_wmean.k_lim
+xlims = G_gFT_wmean.k[0], G_gFT_wmean.k[-1]
+#
+k = high_beams[0]
+for pos, k, pflag in zip(
+    [gs[0:2, 0], gs[0:2, 1], gs[0:2, 2]], high_beams, [True, False, False]
+):
+    ax0 = F.fig.add_subplot(pos)
+    Gplot = (
+        Gk.sel(beam=k)
+        .isel(x=slice(0, -1))
+        .gFT_PSD_data.squeeze()
+        .rolling(k=20, x=2, min_periods=1, center=True)
+        .mean()
+    )
+    Gplot = Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)
+    alpha_range = iter(np.linspace(1, 0, Gplot.x.data.size))
+    for x in Gplot.x.data:
+        ialpha = next(alpha_range)
+        plt.loglog(
+            Gplot.k,
+            Gplot.sel(x=x) / Gplot.k,
+            linewidth=0.5,
+            color=color_schemes.rels[k],
+            alpha=ialpha,
+        )
+        ax0.axvline(
+            k_lims.sel(x=x), linewidth=0.4, color="black", zorder=0, alpha=ialpha
+        )
+
+    plt.title(next(fn) + k, color=col_dict[k], loc="left")
+    plt.xlim(xlims)
+    #
+    if pflag:
+        ax0.tick_params(labelbottom=False, bottom=True)
+        plt.ylabel("Power (m$^2$/k')")
+        plt.legend()
+    else:
+        ax0.tick_params(labelbottom=False, bottom=True, labelleft=False)
+
+for pos, k, pflag in zip(
+    [gs[2:4, 0], gs[2:4, 1], gs[2:4, 2]], low_beams, [True, False, False]
+):
+    ax0 = F.fig.add_subplot(pos)
+    Gplot = (
+        Gk.sel(beam=k)
+        .isel(x=slice(0, -1))
+        .gFT_PSD_data.squeeze()
+        .rolling(k=20, x=2, min_periods=1, center=True)
+        .mean()
+    )
+
+    Gplot = Gplot.where(Gplot["N_per_stancil"] / Gplot["Lpoints"] >= 0.1)
+
+    alpha_range = iter(np.linspace(1, 0, Gplot.x.data.size))
+    for x in Gplot.x.data:
+        ialpha = next(alpha_range)
+        plt.loglog(
+            Gplot.k,
+            Gplot.sel(x=x) / Gplot.k,
+            linewidth=0.5,
+            color=color_schemes.rels[k],
+            alpha=ialpha,
+        )
+        ax0.axvline(
+            k_lims.sel(x=x), linewidth=0.4, color="black", zorder=0, alpha=ialpha
+        )
+
+    plt.title(next(fn) + k, color=col_dict[k], loc="left")
+    plt.xlim(xlims)
+    plt.xlabel("observed wavenumber k' ")
+
+    if pflag:
+        ax0.tick_params(bottom=True)
+        plt.ylabel("Power (m$^2$/k')")
+        plt.legend()
+    else:
+        ax0.tick_params(bottom=True, labelleft=False)
+
+F.save_light(path=plot_path, name=str(ID_name) + "_B06_atten_ov_simple")
+F.save_pup(path=plot_path, name=str(ID_name) + "_B06_atten_ov_simple")
+
+pos = gs[5:, 0:2]
+ax0 = F.fig.add_subplot(pos)
+
+lat_str = (
+    str(np.round(Gx.isel(x=0).lat.mean().data, 2))
+    + " to "
+    + str(np.round(Gx.isel(x=-1).lat.mean().data, 2))
+)
+plt.title(next(fn) + "Mean Displacement Spectra\n(lat=" + lat_str + ")", loc="left")
+
+dd = 10 * np.log((G_gFT_smth / G_gFT_smth.k).isel(x=slice(0, -1)))
+dd = dd.where(~np.isinf(dd), np.nan)
+
+## filter out segments with less then 10% of data points
+dd = dd.where(G_gFT_smth["N_per_stancil_fraction"] >= 0.1)
+
+dd_lims = np.round(dd.quantile(0.01).data * 0.95, 0), np.round(
+    dd.quantile(0.95).data * 1.05, 0
+)
+plt.pcolor(
+    dd.x / 1e3,
+    dd.k,
+    dd,
+    vmin=dd_lims[0],
+    vmax=dd_lims[-1],
+    cmap=color_schemes.white_base_blgror,
+)
+cb = plt.colorbar(orientation="vertical")
+
+cb.set_label("Power (m$^2$/k)")
+plt.plot(
+    G_gFT_smth.isel(x=slice(0, -1)).x / 1e3,
+    G_gFT_smth.isel(x=slice(0, -1)).k_lim,
+    color=color_schemes.black,
+    linewidth=1,
+)
+plt.ylabel("wavenumber k")
+plt.xlabel("X (km)")
+
+pos = gs[6:, -1]
+ax9 = F.fig.add_subplot(pos)
+
+plt.title("Data Coverage (%)", loc="left")
+plt.plot(
+    G_gFT_smth.x / 1e3,
+    G_gFT_smth["N_per_stancil_fraction"] * 100,
+    linewidth=0.8,
+    color="black",
+)
+ax9.spines["left"].set_visible(False)
+ax9.spines["right"].set_visible(True)
+ax9.tick_params(labelright=True, right=True, labelleft=False, left=False)
+ax9.axhline(10, linewidth=0.8, linestyle="--", color="black")
+plt.xlabel("X (km)")
+
+
+F.save_light(path=plot_path, name=str(ID_name) + "_B06_atten_ov")
+F.save_pup(path=plot_path, name=str(ID_name) + "_B06_atten_ov")
+
+
+# reconstruct slope displacement data
+def fit_offset(x, data, model, nan_mask, deg):
+    p_offset = np.polyfit(x[~nan_mask], data[~nan_mask] - model[~nan_mask], deg)
+    p_offset[-1] = 0
+    poly_offset = np.polyval(p_offset, x)
+    return poly_offset
+
+
+def tanh_fitler(x, x_cutoff, sigma_g=0.01):
+    """
+    zdgfsg
+    """
+
+    decay = 0.5 - np.tanh((x - x_cutoff) / sigma_g) / 2
+    return decay
+
+
+def reconstruct_displacement(Gx_1, Gk_1, T3, k_thresh):
+    """
+    reconstructs photon displacement heights for each stancil given the model parameters in Gk_1
+    A low-pass frequeny filter can be applied using k-thresh
+
+    inputs:
+    Gk_1    model data per stencil from _gFT_k file with sin and cos coefficients
+    Gx_1    real data per stencil from _gFT_x file with mean photon heights and coordindate systems
+    T3
+    k_thresh (None) threshold for low-pass filter
+
+    returns:
+    height_model  reconstucted displements heights of the stancil
+    poly_offset   fitted staight line to the residual between observations and model to account for low-pass variability
+    nan_mask      mask where is observed data in
+    """
+
+    dist_stencil = Gx_1.eta + Gx_1.x
+    dist_stencil_lims = dist_stencil[0].data, dist_stencil[-1].data
+
+    gFT_cos_coeff_sel = np.copy(Gk_1.gFT_cos_coeff)
+    gFT_sin_coeff_sel = np.copy(Gk_1.gFT_sin_coeff)
+
+    gFT_cos_coeff_sel = gFT_cos_coeff_sel * tanh_fitler(Gk_1.k, k_thresh, sigma_g=0.003)
+    gFT_sin_coeff_sel = gFT_sin_coeff_sel * tanh_fitler(Gk_1.k, k_thresh, sigma_g=0.003)
+
+    FT_int = gFT.generalized_Fourier(Gx_1.eta + Gx_1.x, None, Gk_1.k)
+    _ = FT_int.get_H()
+    FT_int.p_hat = np.concatenate(
+        [-gFT_sin_coeff_sel / Gk_1.k, gFT_cos_coeff_sel / Gk_1.k]
+    )
+
+    dx = Gx.eta.diff("eta").mean().data
+    height_model = FT_int.model() / dx
+    dist_nanmask = np.isnan(Gx_1.y_data)
+    height_data = np.interp(
+        dist_stencil, T3_sel["dist"], T3_sel["heights_c_weighted_mean"]
+    )
+    return height_model, np.nan, dist_nanmask
+
+
+# cutting Table data
+G_height_model = dict()
+k = "gt2l"
+for bb in Gx.beam.data:
+    G_height_model_temp = dict()
+    for i in np.arange(Gx.x.size):
+        Gx_1 = Gx.isel(x=i).sel(beam=bb)
+        Gk_1 = Gk.isel(x=i).sel(beam=bb)
+        k_thresh = G_gFT_smth.k_lim.isel(x=0).data
+
+        dist_stencil = Gx_1.eta + Gx_1.x
+        dist_stencil_lims = dist_stencil[0].data, dist_stencil[-1].data
+        dist_stencil_lims_plot = dist_stencil_lims
+        dist_stencil_lims_plot = Gx_1.eta[0] * 1 + Gx_1.x, Gx_1.eta[-1] * 1 + Gx_1.x
+
+        T3_sel = B3[k].loc[
+            (
+                (B3[k]["dist"] >= dist_stencil_lims[0])
+                & (B3[k]["dist"] <= dist_stencil_lims[1])
+            )
+        ]
+
+        if T3_sel.shape[0] != 0:
+            height_model, poly_offset, dist_nanmask = reconstruct_displacement(
+                Gx_1, Gk_1, T3_sel, k_thresh=k_thresh
+            )
+            poly_offset = poly_offset * 0
+            G_height_model_temp[str(i) + bb] = xr.DataArray(
+                height_model, coords=Gx_1.coords, dims=Gx_1.dims, name="height_model"
+            )
+        else:
+            G_height_model_temp[str(i) + bb] = xr.DataArray(
+                Gx_1.y_model.data,
+                coords=Gx_1.coords,
+                dims=Gx_1.dims,
+                name="height_model",
+            )
+
+    G_height_model[bb] = xr.concat(G_height_model_temp.values(), dim="x").T
+
+Gx["height_model"] = xr.concat(G_height_model.values(), dim="beam").transpose(
+    "eta", "beam", "x"
+)
+
+Gx_v2, B2_v2, B3_v2 = dict(), dict(), dict()
+for bb in Gx.beam.data:
+    print(bb)
+    Gx_k = Gx.sel(beam=bb)
+    Gh = Gx["height_model"].sel(beam=bb).T
+    Gh_err = Gx_k["model_error_x"].T
+    Gnans = np.isnan(Gx_k.y_model)
+
+    concented_heights = Gh.data.reshape(Gh.data.size)
+    concented_err = Gh_err.data.reshape(Gh.data.size)
+    concented_nans = Gnans.data.reshape(Gnans.data.size)
+    concented_x = (Gh.x + Gh.eta).data.reshape(Gh.data.size)
+
+    dx = Gh.eta.diff("eta")[0].data
+    continous_x_grid = np.arange(concented_x.min(), concented_x.max(), dx)
+    continous_height_model = np.interp(continous_x_grid, concented_x, concented_heights)
+    concented_err = np.interp(continous_x_grid, concented_x, concented_err)
+    continous_nans = np.interp(continous_x_grid, concented_x, concented_nans) == 1
+
+    T3 = B3[bb]
+    T3 = T3.sort_values("x")
+    T3 = T3.sort_values("dist")
+
+    T3["heights_c_model"] = np.interp(
+        T3["dist"], continous_x_grid, continous_height_model
+    )
+    T3["heights_c_model_err"] = np.interp(T3["dist"], continous_x_grid, concented_err)
+    T3["heights_c_residual"] = T3["heights_c_weighted_mean"] - T3["heights_c_model"]
+
+    B3_v2[bb] = T3
+    Gx_v2[bb] = Gx_k
+
+try:
+    G_angle = xr.open_dataset(load_path_angle + "/B05_" + ID_name + "_angle_pdf.nc")
+
+    font_for_pres()
+
+    Ga_abs = (
+        G_angle.weighted_angle_PDF_smth.isel(angle=G_angle.angle > 0).data
+        + G_angle.weighted_angle_PDF_smth.isel(angle=G_angle.angle < 0).data[:, ::-1]
+    ) / 2
+    Ga_abs = xr.DataArray(
+        data=Ga_abs.T,
+        dims=G_angle.dims,
+        coords=G_angle.isel(angle=G_angle.angle > 0).coords,
+    )
+
+    Ga_abs_front = Ga_abs.isel(x=slice(0, 3))
+    Ga_best = (Ga_abs_front * Ga_abs_front.N_data).sum("x") / Ga_abs_front.N_data.sum(
+        "x"
+    )
+
+    theta = Ga_best.angle[Ga_best.argmax()].data
+    theta_flag = True
+
+    font_for_print()
+    F = M.figure_axis_xy(3, 5, view_scale=0.7)
+
+    plt.subplot(2, 1, 1)
+    plt.pcolor(Ga_abs)
+    plt.xlabel("abs angle")
+    plt.ylabel("x")
+
+    ax = plt.subplot(2, 1, 2)
+    Ga_best.plot()
+    plt.title("angle front " + str(theta * 180 / np.pi), loc="left")
+    ax.axvline(theta, color="red")
+    F.save_light(path=plot_path, name="B06_angle_def")
+except:
+    print("no angle data found, skip angle corretion")
+    theta = 0
+    theta_flag = False
+
+# %%
+lam_p = 2 * np.pi / Gk.k
+lam = lam_p * np.cos(theta)
+
+if theta_flag:
+    k_corrected = 2 * np.pi / lam
+    x_corrected = Gk.x * np.cos(theta)
+else:
+    k_corrected = 2 * np.pi / lam * np.nan
+    x_corrected = Gk.x * np.cos(theta) * np.nan
+
+# spectral save
+G5 = G_gFT_wmean.expand_dims(dim="beam", axis=1)
+G5.coords["beam"] = ["weighted_mean"]
+G5 = G5.assign_coords(N_photons=G5.N_photons)
+G5["N_photons"] = G5["N_photons"].expand_dims("beam")
+G5["N_per_stancil_fraction"] = G5["N_per_stancil_fraction"].expand_dims("beam")
+
+Gk_v2 = xr.merge([Gk, G5])
+
+Gk_v2 = Gk_v2.assign_coords(x_corrected=("x", x_corrected.data)).assign_coords(
+    k_corrected=("k", k_corrected.data)
+)
+
+Gk_v2.attrs["best_guess_incident_angle"] = theta
+
+# save collected spectral data
+Gk_v2.to_netcdf(save_path + "/B06_" + ID_name + "_gFT_k_corrected.nc")
+Gx
+# save real space data
+Gx.to_netcdf(save_path + "/B06_" + ID_name + "_gFT_x_corrected.nc")
+try:
+    io.save_pandas_table(
+        B2_v2, "B06_" + ID_name + "_B06_corrected_resid", save_path
+    )  # all photos but heights adjusted and with distance coordinate
+except:
+    os.remove(save_path + "B06_" + ID_name + "_B06_corrected_resid.h5")
+    io.save_pandas_table(
+        B2_v2, "B06_" + ID_name + "_B06_corrected_resid", save_path
+    )  # all photos but heights adjusted and with distance coordinate
+
+try:
+    io.save_pandas_table(
+        B3_v2, "B06_" + ID_name + "_binned_resid", save_path
+    )  # regridding heights
+except:
+    os.remove(save_path + "B06_" + ID_name + "_binned_resid.h5")
+    io.save_pandas_table(
+        B3_v2, "B06_" + ID_name + "_binned_resid", save_path
+    )  # regridding heights
+
+MT.json_save(
+    "B06_success",
+    plot_path + "../",
+    {"time": time.asctime(time.localtime(time.time()))},
+)
+print("done. saved target at " + plot_path + "../B06_success")

From e11ddfe11a19d52b1ff1356672894e6dc5ab1c9f Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Thu, 1 Feb 2024 11:04:45 -0500
Subject: [PATCH 19/30] explicitly declaring matplot-lib import in script

---
 src/icesat2_tracks/analysis_db/B06_correct_separate_var.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
index 0289c9c4..ab2420f1 100644
--- a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
+++ b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
@@ -9,9 +9,8 @@
     color_schemes,
     font_for_pres,
     font_for_print,
-    plt,
     lstrings,
-    fig_sizes,
+    fig_sizes
 )
 
 import h5py
@@ -21,10 +20,11 @@
 import time
 import copy
 import icesat2_tracks.ICEsat2_SI_tools.generalized_FT as gFT
-from scipy.ndimage.measurements import label
+from scipy.ndimage import label
 import pandas as pd
 import xarray as xr
 import numpy as np
+import matplotlib.pyplot as plt
 from matplotlib.gridspec import GridSpec
 
 xr.set_options(display_style="text")

From 81b741867c64d7324a3993243a13de5f646a012d Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Thu, 1 Feb 2024 13:13:44 -0500
Subject: [PATCH 20/30] applied review comments suggestions

---
 .../analysis_db/B06_correct_separate_var.py   | 37 +++++++++----------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
index ab2420f1..8c2962bc 100644
--- a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
+++ b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
@@ -87,7 +87,7 @@ def dict_weighted_mean(Gdict, weight_key):
     N_photons = np.zeros(GSUM.N_per_stancil.size)
 
     counter = 0
-    for k, I in Gdict.items():
+    for _,I in Gdict.items():
         I = I.squeeze()
         print(len(I.x))
         if len(I.x) != 0:
@@ -742,28 +742,25 @@ def reconstruct_displacement(Gx_1, Gk_1, T3, k_thresh):
 
 # save collected spectral data
 Gk_v2.to_netcdf(save_path + "/B06_" + ID_name + "_gFT_k_corrected.nc")
-Gx
+
 # save real space data
 Gx.to_netcdf(save_path + "/B06_" + ID_name + "_gFT_x_corrected.nc")
-try:
-    io.save_pandas_table(
-        B2_v2, "B06_" + ID_name + "_B06_corrected_resid", save_path
-    )  # all photos but heights adjusted and with distance coordinate
-except:
-    os.remove(save_path + "B06_" + ID_name + "_B06_corrected_resid.h5")
-    io.save_pandas_table(
-        B2_v2, "B06_" + ID_name + "_B06_corrected_resid", save_path
-    )  # all photos but heights adjusted and with distance coordinate
 
-try:
-    io.save_pandas_table(
-        B3_v2, "B06_" + ID_name + "_binned_resid", save_path
-    )  # regridding heights
-except:
-    os.remove(save_path + "B06_" + ID_name + "_binned_resid.h5")
-    io.save_pandas_table(
-        B3_v2, "B06_" + ID_name + "_binned_resid", save_path
-    )  # regridding heights
+
+def save_table(data, tablename, save_path):
+    try:
+        io.save_pandas_table(data, tablename, save_path)
+    except Exception as e:
+        tabletoremove = save_path + tablename + ".h5"
+        print(e, f"Failed to save table. Removing {tabletoremove} and re-trying..")
+        os.remove(tabletoremove)
+        io.save_pandas_table(data, tablename, save_path)
+
+B06_ID_name = "B06_" + ID_name
+table_names = [B06_ID_name + suffix for suffix in ["_B06_corrected_resid", "_binned_resid"]]
+data = [B2_v2, B3_v2]
+for tablename, data in zip(table_names, data):
+    save_table(data, tablename, save_path)
 
 MT.json_save(
     "B06_success",

From 87e8546811b0aa5772755318120c9d36e614488b Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Thu, 1 Feb 2024 14:49:51 -0500
Subject: [PATCH 21/30] removed unsed imports

---
 src/icesat2_tracks/analysis_db/B06_correct_separate_var.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
index 8c2962bc..6825a64f 100644
--- a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
+++ b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
@@ -20,7 +20,6 @@
 import time
 import copy
 import icesat2_tracks.ICEsat2_SI_tools.generalized_FT as gFT
-from scipy.ndimage import label
 import pandas as pd
 import xarray as xr
 import numpy as np
@@ -201,7 +200,6 @@ def define_noise_wavenumber_tresh_simple(
     m       slope of the fitted line
     b       intersect of the fitted line
     """
-    from scipy.ndimage.measurements import label
 
     if k_end_lim is None:
         k_end_lim = data_xr.k[-1]

From d60aaf6bf9a2cd8ad9872d88030e26a53115e6c5 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Thu, 1 Feb 2024 22:19:02 +0000
Subject: [PATCH 22/30] fix: update io to iotools in B05_define_angle

---
 src/icesat2_tracks/analysis_db/B05_define_angle.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/icesat2_tracks/analysis_db/B05_define_angle.py b/src/icesat2_tracks/analysis_db/B05_define_angle.py
index b33c7358..f201c207 100644
--- a/src/icesat2_tracks/analysis_db/B05_define_angle.py
+++ b/src/icesat2_tracks/analysis_db/B05_define_angle.py
@@ -12,7 +12,7 @@
     font_for_print    
 )
 
-import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io
 import icesat2_tracks.ICEsat2_SI_tools.spectral_estimates as spec
 
 import xarray as xr

From 4e911df9f45f9a937e87db8172647d40fd5c1ba8 Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Fri, 2 Feb 2024 13:12:50 -0500
Subject: [PATCH 23/30] Fix import of io script. THe io.py was renamed it to
 iotools.py

---
 src/icesat2_tracks/analysis_db/B06_correct_separate_var.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
index 6825a64f..14d485df 100644
--- a/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
+++ b/src/icesat2_tracks/analysis_db/B06_correct_separate_var.py
@@ -14,7 +14,7 @@
 )
 
 import h5py
-import icesat2_tracks.ICEsat2_SI_tools.io as io
+import icesat2_tracks.ICEsat2_SI_tools.iotools as io
 import icesat2_tracks.local_modules.m_tools_ph3 as MT
 from icesat2_tracks.local_modules import m_general_ph3 as M
 import time

From e6132b16709bad57b06a0feef367ccd1d356d35a Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Fri, 2 Feb 2024 13:56:36 -0500
Subject: [PATCH 24/30] uncommenting step5 to test in CI

---
 .github/workflows/test-B01_SL_load_single_file.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-B01_SL_load_single_file.yml b/.github/workflows/test-B01_SL_load_single_file.yml
index fa227d1d..1f5241a3 100644
--- a/.github/workflows/test-B01_SL_load_single_file.yml
+++ b/.github/workflows/test-B01_SL_load_single_file.yml
@@ -29,5 +29,5 @@ jobs:
         run: python src/icesat2_tracks/analysis_db/B03_plot_spectra_ov.py SH_20190502_05180312 SH_testSLsinglefile2 True
       - name: fourth step IOWAGA thredds
         run: python src/icesat2_tracks/analysis_db/A02c_IOWAGA_thredds_prior.py SH_20190502_05180312 SH_testSLsinglefile2 True
-      # - name: Fifth step B04_angle
-      #   run: python src/icesat2_tracks/analysis_db/B04_angle.py SH_20190502_05180312 SH_testSLsinglefile2 True
+      - name: Fifth step B04_angle
+        run: python src/icesat2_tracks/analysis_db/B04_angle.py SH_20190502_05180312 SH_testSLsinglefile2 True
\ No newline at end of file

From 550193e7d369d7524f78537870c67de4e328afeb Mon Sep 17 00:00:00 2001
From: Camilo Diaz <k.diaz99@gmail.com>
Date: Fri, 2 Feb 2024 15:05:35 -0500
Subject: [PATCH 25/30] imports moved to top of the file

---
 .../ICEsat2_SI_tools/generalized_FT.py        | 35 +++++--------------
 1 file changed, 9 insertions(+), 26 deletions(-)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py
index 355d50b3..9037bb7a 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py
@@ -1,6 +1,15 @@
 import numpy as np
 import matplotlib.pyplot as plt
 from icesat2_tracks.ICEsat2_SI_tools import lanczos, spectral_estimates as spec
+import xarray as xr
+import matplotlib.pyplot as plt
+import copy
+import pandas as pd
+import time
+from scipy.signal import detrend
+from numpy import linalg
+import lmfit as LM
+import icesat2_tracks.local_modules.JONSWAP_gamma as spectal_models
 
 
 def rebin(data, dk):
@@ -119,8 +128,6 @@ def define_weight_shutter(weight, k, Ncut=3):
 
 
 def make_xarray_from_dict(D, name, dims, coords):
-    import xarray as xr
-
     D_return = dict()
     for xi, I in D.items():
         coords["x"] = xi
@@ -155,8 +162,6 @@ def define_weights(stancil, prior, x, y, dx, k, max_nfev, plot_flag=False):
     weight = weight * define_weight_shutter(weight, k, Ncut=3)
 
     if plot_flag:
-        import matplotlib.pyplot as plt
-
         plt.plot(k, weight, zorder=12, c="darkgreen", linewidth=0.8, label=weight_name)
 
     # peak normlize weights by std of data
@@ -226,9 +231,6 @@ def cal_spectrogram(
         self.GG, params_dataframe
             params_dataframe is a pd.DataFrame that containes all the parameters of the fitting process (and may contain uncertainties too once they are calculated)
         """
-        import xarray as xr
-        import copy
-        import pandas as pd
 
         X = self.x if x is None else x  # all x positions
         DATA = self.data if data is None else data  # all data points
@@ -244,8 +246,6 @@ def calc_gFT_apply(stancil, prior):
             windows the data accoding to stencil and applies LS spectrogram
             returns: stancil center, spectrum for this stencil, number of datapoints in stancil
             """
-            import matplotlib.pyplot as plt
-            import time
 
             ta = time.perf_counter()
             x_mask = (stancil[0] <= X) & (X <= stancil[-1])
@@ -274,8 +274,6 @@ def calc_gFT_apply(stancil, prior):
             FT = generalized_Fourier(x, y, self.k)
 
             if plot_flag:
-                import matplotlib.pyplot as plt
-
                 plt.figure(figsize=(3.34, 1.8), dpi=300)
 
             # define weights. Weights are normalized to 1
@@ -628,14 +626,11 @@ def calc_var(self):
 
     def parceval(self, add_attrs=True, weight_data=False):
         "test Parceval theorem"
-        import copy
 
         DATA = self.data
         X = self.x
 
         def get_stancil_var_apply(stancil):
-            from scipy.signal import detrend
-
             "returns the variance of yy for stancil"
             x_mask = (stancil[0] < X) & (X <= stancil[-1])
             idata = DATA[x_mask]
@@ -749,7 +744,6 @@ def __init__(self, x, ydata, k):
         """
         non_dimensionalize (bool, default=True) if True, then the data and R_data_uncertainty is non-dimensionalized by the std of the data
         """
-        import numpy as np
 
         self.x, self.ydata, self.k = x, ydata, k
         self.M = self.k.size  # number of wavenumbers
@@ -784,8 +778,6 @@ def define_problem(self, P_weight, R_data_uncertainty):
         self.R_1d = R_data_uncertainty
 
     def solve(self):
-        from numpy import linalg
-
         inv = linalg.inv
         """ 
         solves the linear inverse problem, return hessian and p_hat
@@ -887,8 +879,6 @@ def get_stats(self, dk, Nx_full, print_flag=False):
 
 class get_prior_spec:
     def __init__(self, freq, data):
-        import lmfit as LM
-
         self.LM = LM
         self.data = data
         self.freq = freq
@@ -910,7 +900,6 @@ def set_parameters(self, flim=None):
         self.params LMfit.parameters class needed for optimization
 
         """
-        import numpy as np
 
         params = self.LM.Parameters()
 
@@ -940,8 +929,6 @@ def model_func(self, f, params):
         )
 
     def non_dim_spec_model(self, f, f_max, amp, gamma=1, angle_rad=0):
-        import icesat2_tracks.local_modules.JONSWAP_gamma as spectal_models
-
         f_true = f * np.cos(angle_rad)
         model = spectal_models.JONSWAP_default_alt(f_true, f_max, 20, gamma=gamma)
         model = amp * model / np.nanmean(model)
@@ -974,13 +961,9 @@ def optimize(self, fitting_args=None, method="dual_annealing", max_nfev=None):
         return self.fitter
 
     def plot_data(self):
-        import matplotlib.pyplot as plt
-
         plt.plot(self.freq, self.data, "k")
 
     def plot_model(self, pars):
-        import matplotlib.pyplot as plt
-
         plt.plot(self.freq, self.model_func(self.freq, pars), "b--")
 
     def runningmean(self, var, m, tailcopy=False):

From d88898aef70bcc9e80a278506b2ef2ce0e10a4a4 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Fri, 2 Feb 2024 15:46:36 -0500
Subject: [PATCH 26/30] Update generalized_FT.py

fix: remove duplicate import and reorganize
---
 .../ICEsat2_SI_tools/generalized_FT.py              | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py
index 9037bb7a..85158d1e 100644
--- a/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py
+++ b/src/icesat2_tracks/ICEsat2_SI_tools/generalized_FT.py
@@ -1,14 +1,15 @@
+import copy
+import time
+
+from numpy import linalg
 import numpy as np
-import matplotlib.pyplot as plt
-from icesat2_tracks.ICEsat2_SI_tools import lanczos, spectral_estimates as spec
+import pandas as pd
 import xarray as xr
 import matplotlib.pyplot as plt
-import copy
-import pandas as pd
-import time
 from scipy.signal import detrend
-from numpy import linalg
 import lmfit as LM
+
+from icesat2_tracks.ICEsat2_SI_tools import lanczos, spectral_estimates as spec
 import icesat2_tracks.local_modules.JONSWAP_gamma as spectal_models
 
 

From 2b25637e900f36a7c9225abcfa19a36742c01e61 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Mon, 5 Feb 2024 21:33:40 +0000
Subject: [PATCH 27/30] feat: conditional output suppression Also, add some
 docstrings and doctests

---
 .../analysis_db/B01_SL_load_single_file.py    |   3 +-
 src/icesat2_tracks/clitools.py                | 108 +++++++++++++++---
 2 files changed, 93 insertions(+), 18 deletions(-)

diff --git a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
index 176f768b..0adb2216 100644
--- a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
+++ b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
@@ -90,6 +90,7 @@ def run_B01_SL_load_single_file(
     ID_flag: bool = True,
     plot_flag: bool = True,
     output_dir: str = typer.Option(None, callback=validate_output_dir),
+    verbose: bool = False
 ):
     """
     Open an ICEsat2 tbeam_stats.pyrack, apply filters and corrections, and output smoothed photon heights on a regular grid in an .nc file.
@@ -106,7 +107,7 @@ def run_B01_SL_load_single_file(
     matplotlib.use("Agg")  # prevent plot windows from opening
 
     # Select region and retrieve batch of tracks
-    with suppress_stdout():
+    with suppress_stdout(verbose):
         track_name, batch_key, ID_flag = io.init_from_input(
             [
                 None,
diff --git a/src/icesat2_tracks/clitools.py b/src/icesat2_tracks/clitools.py
index b13f3ede..3567a9a9 100644
--- a/src/icesat2_tracks/clitools.py
+++ b/src/icesat2_tracks/clitools.py
@@ -9,14 +9,17 @@
 
 
 @contextmanager
-def suppress_stdout():
-    with open(os.devnull, "w") as devnull:
-        old_stdout = sys.stdout
-        sys.stdout = devnull
-        try:
-            yield
-        finally:
-            sys.stdout = old_stdout
+def suppress_stdout(verbose=False):
+    if verbose:
+        yield
+    else:
+        with open(os.devnull, "w") as devnull:
+            old_stdout = sys.stdout
+            sys.stdout = devnull
+            try:
+                yield
+            finally:
+                sys.stdout = old_stdout
 
 
 # Callbacks for typer
@@ -32,9 +35,43 @@ def validate_pattern_wrapper(
     return value
 
 
-def validate_track_name(
-    ctx: typer.Context, param: typer.CallbackParam, value: str
-) -> str:
+def validate_track_name(ctx: typer.Context, param: typer.CallbackParam, value: str) -> str:
+    """
+    Validate the track name `value` based on a specific pattern (see below).
+
+    Args:
+        ctx (typer.Context): The context in which the command is being invoked.
+        param (typer.CallbackParam): The parameter that is being validated.
+        value (str): The value of the parameter.
+
+    Returns:
+        str: The validated track name.
+
+    Raises:
+        click.exceptions.BadParameter: If the track name does not match the pattern.
+
+    Pattern:
+        YYYYMMDDHHMMSS_XXXXXXXX_XXX_XX
+        where:
+        YYYYMMDDHHMMSS is a timestamp,
+        XXXXXXXX is an 8-digit number,
+        XXX is a 3-digit number,
+        XX is a 2-digit number.
+
+    Example:
+        >>> validate_track_name(None, None, '20220101123000_12345678_123_12')
+        '20220101123000_12345678_123_12'
+        >>> validate_track_name(None, None, '20221231115959_87654321_321_21')
+        '20221231115959_87654321_321_21'
+        >>> validate_track_name(None, None, '20220228235959_00000000_000_00')
+        '20220228235959_00000000_000_00'
+    
+    Doctest:
+            >>> validate_track_name(None, None, 'invalid_track_name')
+            Traceback (most recent call last):
+            ...
+            click.exceptions.BadParameter: track_name must be in the format: YYYYMMDDHHMMSS_XXXXXXXX_XXX_XX
+    """
     pattern = r"\d{4}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])([01][0-9]|2[0-3])([0-5][0-9]){2}_\d{8}_\d{3}_\d{2}"
     error_message = "track_name must be in the format: YYYYMMDDHHMMSS_XXXXXXXX_XXX_XX"
     return validate_pattern_wrapper(
@@ -46,9 +83,46 @@ def validate_track_name(
     )
 
 
-def validate_batch_key(
-    ctx: typer.Context, param: typer.CallbackParam, value: str
-) -> str:
+def validate_batch_key(ctx: typer.Context, param: typer.CallbackParam, value: str) -> str:
+    """
+    Validate a batch key based on a specific pattern (see below).
+
+    Args:
+        ctx (typer.Context): The context in which the command is being invoked.
+        param (typer.CallbackParam): The parameter that is being validated.
+        value (str): The value of the parameter.
+
+    Returns:
+        str: The validated batch key.
+
+    Raises:
+        click.exceptions.BadParameter: If the batch key does not match the pattern.
+
+    Pattern:
+        .*_.*
+        where:
+        .* is any character (including none),
+        _ is a literal underscore,
+        .* is any character (including none).
+
+    Example:
+        >>> validate_batch_key(None, None, 'SH_testSLsinglefile2')
+        'SH_testSLsinglefile2'
+        >>> validate_batch_key(None, None, 'batch_key')
+        'batch_key'
+        >>> validate_batch_key(None, None, '_')
+        '_'
+
+    Doctest:
+        >>> validate_batch_key(None, None, '')
+        Traceback (most recent call last):
+        ...
+        click.exceptions.BadParameter: batch_key must be in the format 'SH_testSLsinglefile2'
+        >>> validate_batch_key(None, None, 'badbatchkey')
+        Traceback (most recent call last):
+        ...
+        click.exceptions.BadParameter: batch_key must be in the format 'SH_testSLsinglefile2'
+    """
     pattern = r".*_.*"
     error_message = "batch_key must be in the format 'SH_testSLsinglefile2'"
     return validate_pattern_wrapper(
@@ -80,11 +154,11 @@ def echoparam(text: str, value, textcolor: str = "green", valuecolor: str = "whi
     echo(f"{colored(text,textcolor)}: {colored(value, valuecolor)}")
 
 
-def report_input_parameters(heading: str = "** Input parameters:", **kargs):
+def report_input_parameters(heading: str = "** Input parameters:", **kwargs):
     echo(heading)
-    for key in kargs:
+    for key in kwargs:
         if key != "args":
-            echoparam(key, kargs[key])
+            echoparam(key, kwargs[key])
 
 
 def update_paths_mconfig(output_dir, mconfig):

From 49fb7ab7bd2c00231d4c966fa36071588feae711 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Mon, 5 Feb 2024 21:39:40 +0000
Subject: [PATCH 28/30] fix: echo with white => print

---
 src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
index 0adb2216..34b5ae87 100644
--- a/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
+++ b/src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py
@@ -157,10 +157,10 @@ def run_B01_SL_load_single_file(
     }
 
     maximum_height = 30  # (meters) maximum height past dem_h correction
-    echo("STARTS", "white")
-    echo("Fetching ATL03 data from sliderule", "white")
+    print("STARTS")
+    print("Fetching ATL03 data from sliderule")
     gdf = icesat2.atl06p(params_yapc, resources=[ATL03_track_name])
-    echo("ENDS", "white")
+    print("ENDS")
     gdf = sct.correct_and_remove_height(gdf, maximum_height)
 
     cdict = dict()

From cc5e459e6078de0ab398cd64895bc30fe10654c8 Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Tue, 6 Feb 2024 04:12:43 +0000
Subject: [PATCH 29/30] fix: move pytest to dev deps

---
 pyproject.toml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 479de43b..59c43ef9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -122,8 +122,6 @@ dependencies = [ # Optional
   "siphon >=0.9, <1.0.0",
   "h5py >=3.5.0, < 4.0.0",
   "termcolor >=2.4.0, < 3.0.0",
-  "pytest >=7.4.4, < 8.0.0",
-  "pytest-xdist >=3.5.0, < 4.0.0",
   "typer >=0.9.0, < 1.0.0",
 ]
 
@@ -137,7 +135,7 @@ dependencies = [ # Optional
 # projects.
 [project.optional-dependencies] # Optional
 dev = ["check-manifest","black","icesat2-tracks[test]"]
-test = ["coverage"]
+test = ["coverage", "pytest >=7.4.4, <8.0.0", "pytest-xdist >=3.5.0, <4.0.0"]
 
 # List URLs that are relevant to your project
 #

From 802725693bd0252ef2a39651d243884e21c8a1ef Mon Sep 17 00:00:00 2001
From: Carlos Paniagua <cpaniaguam@gmail.com>
Date: Tue, 6 Feb 2024 10:20:28 -0500
Subject: [PATCH 30/30] feat: update test workflow and add new CLI commands

---
 .../test-B01_SL_load_single_file.yml          |  7 ++-
 pyproject.toml                                |  3 ++
 src/icesat2_tracks/app.py                     | 50 +++++++++++++++++++
 3 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 src/icesat2_tracks/app.py

diff --git a/.github/workflows/test-B01_SL_load_single_file.yml b/.github/workflows/test-B01_SL_load_single_file.yml
index aefe5e90..b2821de6 100644
--- a/.github/workflows/test-B01_SL_load_single_file.yml
+++ b/.github/workflows/test-B01_SL_load_single_file.yml
@@ -1,4 +1,4 @@
-name: Test B01_SL_load_single_file
+name: Test Steps
 on:
   pull_request: {}
   push:
@@ -23,6 +23,11 @@ jobs:
         run: pip install .
       - name: List dependencies
         run: pip list
+      - name: test icesat2waves app
+        run: icesat2waves --help
+      - name: test command for step 1
+        run: load_single_file --help
       - name: first step B01_SL_load_single_file
         run: python src/icesat2_tracks/analysis_db/B01_SL_load_single_file.py --track-name 20190502052058_05180312_005_01 --batch-key SH_testSLsinglefile2 --output-dir ./work
+
         
diff --git a/pyproject.toml b/pyproject.toml
index 59c43ef9..ccedb2f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -157,6 +157,9 @@ test = ["coverage", "pytest >=7.4.4, <8.0.0", "pytest-xdist >=3.5.0, <4.0.0"]
 [project.scripts]  # Optional
 #TODO: ADD ANY SCRIPTS WE WANT TO HAVE
 download = "icesat2_tracks.icesat2_tools_scripts.nsidc_icesat2_associated2:main"
+load_single_file = "icesat2_tracks.analysis_db.B01_SL_load_single_file:step1app"
+icesat2waves = "icesat2_tracks.app:app"
+
 
 # This is configuration specific to the `setuptools` build backend.
 # If you are using a different build backend, you will need to change this.
diff --git a/src/icesat2_tracks/app.py b/src/icesat2_tracks/app.py
new file mode 100644
index 00000000..f2e350be
--- /dev/null
+++ b/src/icesat2_tracks/app.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+"""
+Main CLI for icesat2waves.
+"""
+from typer import Typer, Option
+from icesat2_tracks.analysis_db.B01_SL_load_single_file import (
+    run_B01_SL_load_single_file as _loadfile,
+)
+
+from icesat2_tracks.clitools import (
+    validate_track_name,
+    validate_batch_key,
+    validate_output_dir,
+    validate_track_name_steps_gt_1,
+)
+
+
+app = Typer(add_completion=False)
+validate_track_name_gt_1_opt = Option(..., callback=validate_track_name_steps_gt_1)
+validate_batch_key_opt = Option(..., callback=validate_batch_key)
+validate_output_dir_opt = Option(None, callback=validate_output_dir)
+
+
+def run_job(
+    analysis_func,
+    track_name: str,
+    batch_key: str,
+    ID_flag: bool = True,
+    output_dir: str = validate_output_dir_opt,
+):
+    analysis_func(
+        track_name,
+        batch_key,
+        ID_flag,
+        output_dir,
+    )
+
+
+@app.command(help=_loadfile.__doc__)
+def loadfile(
+    track_name: str = Option(..., callback=validate_track_name),
+    batch_key: str = validate_batch_key_opt,
+    ID_flag: bool = True,
+    output_dir: str = validate_output_dir_opt,
+):
+    run_job(_loadfile, track_name, batch_key, ID_flag, output_dir)
+
+
+if __name__ == "__main__":
+    app()
\ No newline at end of file