From 90a02a11e30e12c4b3c45c4e8614ab98bc3c2050 Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Fri, 15 Dec 2023 15:44:34 -0800
Subject: [PATCH 01/17] first commit of loss factor analysis algorithm

---
 .../algorithms/loss_factor_analysis.py        | 170 ++++++++++++++++++
 1 file changed, 170 insertions(+)
 create mode 100644 solardatatools/algorithms/loss_factor_analysis.py

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
new file mode 100644
index 00000000..34212795
--- /dev/null
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -0,0 +1,170 @@
+""" Combined Soiling and Degradation Estimation Module
+
+This module is for estimation of degradation and soiling losses from unlabeled
+daily energy production data. Model is of the form
+
+y_t = x_t * d_t * s_t * w_t, for t = 1,...,T
+
+where y_t [kWh] is the measured real daily energy on each day, x_t [kWh] is an ideal yearly baseline of performance,
+and d_t, s_t, and w_t are the loss factors for degradation, soiling, and weather respectively.
+
+"""
+
+from gfosd import Problem
+import gfosd.components as comp
+from spcqe.functions import make_basis_matrix, make_regularization_matrix
+
+
+class LossFactorEstimator:
+    def __init__(self, energy_data, **kwargs):
+        self.energy_data = energy_data
+        log_energy = np.zeros_like(self.energy_data)
+        is_zero = np.isclose(dh.daily_signals.energy, 0, atol=1e-1)
+        log_energy[is_zero] = np.nan
+        log_energy[~is_zero] = np.log(dh.daily_signals.energy[~is_zero])
+        self.log_energy = log_energy
+        self.use_ixs = ~is_zero
+        self.problem = self.make_problem(**kwargs)
+        self.degradation_rate = None
+        self.energy_model = None
+        self.log_energy_model = None
+        self.total_energy_loss = None
+        self.total_percent_loss = None
+        self.degradation_energy_loss = None
+        self.degradation_percent_loss = None
+        self.soiling_energy_loss = None
+        self.soiling_percent_loss = None
+        self.weather_energy_loss = None
+        self.weather_percent_loss = None
+
+    def estimate_losses(self, solver="CLARABEL"):
+        self.problem.decompose(solver=solver)
+        # in the SD formulation, we put the residual term first, so it's the reverse order of how we specify this model (weather last)
+        self.log_energy_model = self.problem.decomposition[::-1]
+        self.energy_model = np.exp(self.log_energy_model)
+        self.degradation_rate = 100 * np.median(
+            (self.energy_model[1][365:] - self.energy_model[1][:-365])
+            / self.energy_model[1][365:]
+        )
+        total_energy = np.sum(self.energy_data[self.use_ixs])
+        self.total_energy_loss = total_energy - np.sum(
+            self.energy_model[0][self.use_ixs]
+        )
+        self.degradation_energy_loss = total_energy - np.sum(
+            np.product(self.energy_model[[True, False, True, True]], axis=0)[
+                self.use_ixs
+            ]
+        )
+        self.soiling_energy_loss = total_energy - np.sum(
+            np.product(self.energy_model[[True, True, False, True]], axis=0)[
+                self.use_ixs
+            ]
+        )
+        self.weather_energy_loss = total_energy - np.sum(
+            np.product(self.energy_model[[True, True, True, False]], axis=0)[
+                self.use_ixs
+            ]
+        )
+        items = [
+            self.degradation_energy_loss,
+            self.soiling_energy_loss,
+            self.weather_energy_loss,
+        ]
+        avg_item = np.average(items)
+        new_items = []
+        for item in items:
+            item -= avg_item
+            item += self.total_energy_loss / len(items)
+            new_items.append(item)
+        (
+            self.degradation_energy_loss,
+            self.soiling_energy_loss,
+            self.weather_energy_loss,
+        ) = new_items
+        self.total_percent_loss = (
+            100 * self.total_energy_loss / np.sum(self.energy_data)
+        )
+        self.degradation_percent_loss = (
+            100 * self.degradation_energy_loss / np.sum(self.energy_data)
+        )
+        self.soiling_percent_loss = (
+            100 * self.soiling_energy_loss / np.sum(self.energy_data)
+        )
+        self.weather_percent_loss = (
+            100 * self.weather_energy_loss / np.sum(self.energy_data)
+        )
+
+        return
+
+    def report(self):
+        if self.total_energy_loss is not None:
+            out = {
+                "degradation rate [%/yr]": self.degradation_rate,
+                "total percent loss [%]": self.total_percent_loss,
+                "degradation percent loss [%]": self.degradation_percent_loss,
+                "soiling percent loss [%]": self.soiling_percent_loss,
+                "weather percent loss [%]": self.weather_percent_loss,
+                "total energy loss [kWh]": self.total_energy_loss,
+                "degradation energy loss [kWh]": self.degradation_energy_loss,
+                "soiling energy loss [kWh]": self.soiling_energy_loss,
+                "weather energy loss [kWh]": self.weather_energy_loss,
+            }
+            return out
+
+    def holdout_validate(self, seed=None, solver="CLARABEL"):
+        residual, test_ix = self.problem.holdout_decompose(seed=seed, solver=solver)
+        error_metric = np.sum(np.abs(residual))
+        return error_metric
+
+    def make_problem(
+        self,
+        tau=0.95,
+        num_harmonics=4,
+        deg_type="linear",
+        include_soiling=True,
+        weight_seasonal=10e-2,
+        weight_soiling_stiffness=1e1,
+        weight_soiling_sparsity=1e-2,
+        weight_deg_nonlinear=10e4,
+    ):
+        # Pinball loss noise
+        c1 = comp.SumQuantile(tau=tau)
+        # Smooth periodic term
+        length = len(self.log_energy)
+        periods = [365.2425]  # average length of a year in days
+        _B = make_basis_matrix(num_harmonics, length, periods)
+        _D = make_regularization_matrix(num_harmonics, weight_seasonal, periods)
+        c2 = comp.Basis(basis=_B, penalty=_D)
+        # Soiling term
+        if include_soiling:
+            c3 = comp.Aggregate(
+                [
+                    comp.Inequality(vmax=0),
+                    comp.SumAbs(weight=weight_soiling_stiffness, diff=2),
+                    comp.SumAbs(weight=weight_soiling_sparsity),
+                ]
+            )
+        else:
+            c3 = comp.Aggregate([comp.NoSlope(), comp.FirstValEqual(value=0)])
+        # Degradation term
+        if deg_type == "linear":
+            c4 = comp.Aggregate([comp.NoCurvature(), comp.FirstValEqual(value=0)])
+        elif deg_type == "nonlinear":
+            n_tot = length
+            n_reduce = int(0.9 * n_tot)
+            bottom_mat = sp.lil_matrix((n_tot - n_reduce, n_reduce))
+            bottom_mat[:, -1] = 1
+            custom_basis = sp.bmat([[sp.eye(n_reduce)], [bottom_mat]])
+            c4 = comp.Aggregate(
+                [
+                    comp.Inequality(vmax=0, diff=1),
+                    comp.SumSquare(diff=2, weight=weight_deg_nonlinear),
+                    comp.FirstValEqual(value=0),
+                    comp.Basis(custom_basis),
+                ]
+            )
+        elif deg_select.value == "none":
+            c4 = comp.Aggregate([comp.NoSlope(), comp.FirstValEqual(value=0)])
+
+        prob = Problem(self.log_energy, [c1, c3, c4, c2])
+        return prob

From 847e96d5b31fea70a8f03d7aa7f36817e1e6a1f2 Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Wed, 7 Feb 2024 13:55:56 -0800
Subject: [PATCH 02/17] updated loss factor decomposition and attribution
 algorithms

---
 .../algorithms/loss_factor_analysis.py        | 269 ++++++++++++++----
 1 file changed, 209 insertions(+), 60 deletions(-)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index 34212795..b66dcc06 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -15,15 +15,21 @@
 from spcqe.functions import make_basis_matrix, make_regularization_matrix
 
 
-class LossFactorEstimator:
-    def __init__(self, energy_data, **kwargs):
+class DegradationSoilingEstimator:
+    def __init__(
+        self, energy_data, capacity_change_labels=None, outage_flags=None, **kwargs
+    ):
         self.energy_data = energy_data
         log_energy = np.zeros_like(self.energy_data)
-        is_zero = np.isclose(dh.daily_signals.energy, 0, atol=1e-1)
+        is_zero = np.isclose(energy_data, 0, atol=1e-1)
         log_energy[is_zero] = np.nan
-        log_energy[~is_zero] = np.log(dh.daily_signals.energy[~is_zero])
+        log_energy[~is_zero] = np.log(energy_data[~is_zero])
         self.log_energy = log_energy
         self.use_ixs = ~is_zero
+        if outage_flags is not None:
+            self.use_ixs = np.logical_and(self.use_ixs, ~outage_flags)
+        self.capacity_change_labels = capacity_change_labels
+        self.total_measured_energy = np.sum(self.energy_data[self.use_ixs])
         self.problem = self.make_problem(**kwargs)
         self.degradation_rate = None
         self.energy_model = None
@@ -31,14 +37,19 @@ def __init__(self, energy_data, **kwargs):
         self.total_energy_loss = None
         self.total_percent_loss = None
         self.degradation_energy_loss = None
-        self.degradation_percent_loss = None
         self.soiling_energy_loss = None
-        self.soiling_percent_loss = None
+        self.capacity_change_loss = None
         self.weather_energy_loss = None
         self.weather_percent_loss = None
+        self.outage_energy_loss = None
+        self.degradation_percent = None
+        self.soiling_percent = None
+        self.capacity_change_percent = None
+        self.weather_percent = None
+        self.outage_percent = None
 
     def estimate_losses(self, solver="CLARABEL"):
-        self.problem.decompose(solver=solver)
+        self.problem.decompose(solver=solver, verbose=False)
         # in the SD formulation, we put the residual term first, so it's the reverse order of how we specify this model (weather last)
         self.log_energy_model = self.problem.decomposition[::-1]
         self.energy_model = np.exp(self.log_energy_model)
@@ -46,68 +57,45 @@ def estimate_losses(self, solver="CLARABEL"):
             (self.energy_model[1][365:] - self.energy_model[1][:-365])
             / self.energy_model[1][365:]
         )
+        # self.energy_lost_outages = np.sum(self.energy_model[:, self.use_ixs]) - np.sum(self.energy_model)
         total_energy = np.sum(self.energy_data[self.use_ixs])
-        self.total_energy_loss = total_energy - np.sum(
-            self.energy_model[0][self.use_ixs]
-        )
-        self.degradation_energy_loss = total_energy - np.sum(
-            np.product(self.energy_model[[True, False, True, True]], axis=0)[
-                self.use_ixs
-            ]
-        )
-        self.soiling_energy_loss = total_energy - np.sum(
-            np.product(self.energy_model[[True, True, False, True]], axis=0)[
-                self.use_ixs
-            ]
-        )
-        self.weather_energy_loss = total_energy - np.sum(
-            np.product(self.energy_model[[True, True, True, False]], axis=0)[
-                self.use_ixs
-            ]
-        )
-        items = [
-            self.degradation_energy_loss,
-            self.soiling_energy_loss,
-            self.weather_energy_loss,
-        ]
-        avg_item = np.average(items)
-        new_items = []
-        for item in items:
-            item -= avg_item
-            item += self.total_energy_loss / len(items)
-            new_items.append(item)
-        (
-            self.degradation_energy_loss,
-            self.soiling_energy_loss,
-            self.weather_energy_loss,
-        ) = new_items
-        self.total_percent_loss = (
-            100 * self.total_energy_loss / np.sum(self.energy_data)
-        )
-        self.degradation_percent_loss = (
-            100 * self.degradation_energy_loss / np.sum(self.energy_data)
-        )
-        self.soiling_percent_loss = (
-            100 * self.soiling_energy_loss / np.sum(self.energy_data)
-        )
-        self.weather_percent_loss = (
-            100 * self.weather_energy_loss / np.sum(self.energy_data)
-        )
+        baseline_energy = np.sum(self.energy_model[0])
+        self.total_energy_loss = total_energy - baseline_energy
+        self.total_percent_loss = 100 * self.total_energy_loss / baseline_energy
+
+        out = attribute_losses(self.energy_model, self.use_ixs)
+        self.degradation_energy_loss = out[0]
+        self.soiling_energy_loss = out[1]
+        self.capacity_change_loss = out[2]
+        self.weather_energy_loss = out[3]
+        self.outage_energy_loss = out[4]
+
+        self.degradation_percent = out[0] / self.total_energy_loss
+        self.soiling_percent = out[1] / self.total_energy_loss
+        self.capacity_change_percent = out[2] / self.total_energy_loss
+        self.weather_percent = out[3] / self.total_energy_loss
+        self.outage_percent = out[4] / self.total_energy_loss
 
+        assert np.isclose(
+            self.total_energy_loss,
+            self.degradation_energy_loss
+            + self.soiling_energy_loss
+            + self.capacity_change_loss
+            + self.weather_energy_loss
+            + self.outage_energy_loss,
+        )
         return
 
     def report(self):
         if self.total_energy_loss is not None:
             out = {
                 "degradation rate [%/yr]": self.degradation_rate,
-                "total percent loss [%]": self.total_percent_loss,
-                "degradation percent loss [%]": self.degradation_percent_loss,
-                "soiling percent loss [%]": self.soiling_percent_loss,
-                "weather percent loss [%]": self.weather_percent_loss,
                 "total energy loss [kWh]": self.total_energy_loss,
                 "degradation energy loss [kWh]": self.degradation_energy_loss,
                 "soiling energy loss [kWh]": self.soiling_energy_loss,
+                "capacity change energy loss [kWh]": self.capacity_change_loss,
                 "weather energy loss [kWh]": self.weather_energy_loss,
+                "system outage loss [kWh]": self.outage_energy_loss,
             }
             return out
 
@@ -118,12 +106,12 @@ def holdout_validate(self, seed=None, solver="CLARABEL"):
 
     def make_problem(
         self,
-        tau=0.95,
+        tau=0.9,
         num_harmonics=4,
         deg_type="linear",
         include_soiling=True,
         weight_seasonal=10e-2,
-        weight_soiling_stiffness=1e1,
+        weight_soiling_stiffness=1e0,
         weight_soiling_sparsity=1e-2,
         weight_deg_nonlinear=10e4,
     ):
@@ -141,6 +129,9 @@ def make_problem(
                 [
                     comp.Inequality(vmax=0),
                     comp.SumAbs(weight=weight_soiling_stiffness, diff=2),
+                    comp.SumQuantile(
+                        tau=0.98, weight=10 * weight_soiling_sparsity, diff=1
+                    ),
                     comp.SumAbs(weight=weight_soiling_sparsity),
                 ]
             )
@@ -165,6 +156,164 @@ def make_problem(
             )
         elif deg_select.value == "none":
             c4 = comp.Aggregate([comp.NoSlope(), comp.FirstValEqual(value=0)])
+        # capacity change term — leverage previous analysis from SDT pipeline
+        if self.capacity_change_labels is not None:
+            basis_M = np.zeros((length, len(set(self.capacity_change_labels))))
+            for lb in set(self.capacity_change_labels):
+                slct = np.array(self.capacity_change_labels) == lb
+                basis_M[slct, lb] = 1
+            c5 = comp.Aggregate(
+                [
+                    comp.Inequality(vmax=0),
+                    comp.Basis(basis=basis_M),
+                    comp.SumAbs(weight=1e-6),
+                ]
+            )
+        else:
+            c5 = comp.Aggregate([comp.NoSlope(), comp.FirstValEqual(value=0)])
 
-        prob = Problem(self.log_energy, [c1, c3, c4, c2])
+        prob = Problem(self.log_energy, [c1, c5, c3, c4, c2], use_set=self.use_ixs)
         return prob
+
+    def plot_pie(self):
+        plt.pie(
+            [
+                np.clip(-self.degradation_energy_loss, 0, np.inf),
+                np.clip(-self.soiling_energy_loss, 0, np.inf),
+                np.clip(-self.capacity_change_loss, 0, np.inf),
+                np.clip(-self.weather_energy_loss, 0, np.inf),
+                np.clip(-self.outage_energy_loss, 0, np.inf),
+            ],
+            labels=[
+                "degradation",
+                "soiling",
+                "capacity change",
+                "weather",
+                "outages",
+            ],
+            autopct="%1.1f%%",
+        )
+        plt.title("System loss breakdown")
+        return plt.gcf()
+
+    def plot_waterfall(self):
+        index = [
+            "baseline",
+            "weather",
+            "outages",
+            "capacity changes",
+            "soiling",
+            "degradation",
+        ]
+        bl = np.sum(self.energy_model[0])
+        data = {
+            "amount": [
+                bl,
+                self.weather_energy_loss,
+                self.outage_energy_loss,
+                self.capacity_change_loss,
+                self.soiling_energy_loss,
+                self.degradation_energy_loss,
+            ]
+        }
+        fig = waterfall_plot(data, index)
+        return fig
+
+
+def model_wrapper(energy_model, use_ixs):
+    n = energy_model.shape[0]
+
+    def model_f(**kwargs):
+        defaults = {f"arg{i + 1}": False for i in range(n)}
+        defaults.update(kwargs)
+        slct = [True] + [item for _, item in defaults.items()]
+        apply_outages = slct[-1]
+        slct = slct[:-1]
+        model_select = energy_model[slct]
+        daily_energy = np.product(model_select, axis=0)
+        if apply_outages:
+            daily_energy = daily_energy[use_ixs]
+        return np.sum(daily_energy)
+
+    return model_f
+
+
+def enumerate_paths_full(origin, destination, path=None):
+    """
+    recursive algorithm for generating all possible monotonically increasing paths between
+    two points on a n-dimensional hypercube
+    """
+    origin = list(origin)
+    destination = list(destination)
+    correct_ordering = np.all(
+        np.asarray(destination, dtype=int) - np.asarray(origin, dtype=int) >= 0
+    )
+    if not correct_ordering:
+        raise Exception("destination must be larger than origin in all dimensions")
+    if path is None:
+        path = []
+    paths = []
+    if origin == destination:
+        # a path has been completed
+        paths.append(path + [origin])
+    else:
+        # find the next index that can be incremented
+        for i in range(len(origin)):
+            if origin[i] != destination[i]:
+                # create the next point in this path
+                next_position = list(origin)
+                next_position[i] = destination[0]
+                # recurse to finish all paths that begin on this path
+                paths.extend(
+                    enumerate_paths_full(next_position, destination, path + [origin])
+                )
+
+    return paths
+
+
+def enumerate_paths(n, dtype=int):
+    """
+    enumerates all possible paths from the origin to the ones vector in R^n
+    """
+    origin = np.zeros(n, dtype=dtype)
+    destination = np.ones(n, dtype=dtype)
+    return np.asarray(enumerate_paths_full(origin, destination))
+
+
+def attribute_losses(energy_model, use_ixs):
+    """This function assigns a total attribution to each loss factor, given a
+    multiplicative loss factor model relative to a baseline, using Shapley
+    attribution.
+
+    :param energy_model: a multiplicative decomposition of PV daily energy, with the
+        baseline first -- ie: baseline, degradation, soiling, capacity changes, and
+        weather (residual)
+    :type energy_model: 2d numpy array of shape n x T, where T is the number of days
+        and n is the number of model factors
+    :param use_ixs: a numpy boolean index where False records a system outage
+    :type use_ixs: 1d numpy boolean array
+
+    :return: a list of energy loss attributions, in the input order
+    :rtype: 1d numpy float array
+    """
+    model_f = model_wrapper(energy_model, use_ixs)
+    paths = enumerate_paths(energy_model.shape[0], dtype=bool)
+    energy_estimates = np.zeros((paths.shape[0], paths.shape[1]))
+    for ix, path in enumerate(paths):
+        for jx, point in enumerate(path):
+            kwargs = {f"arg{i + 1}": v for i, v in enumerate(point)}
+            energy = model_f(**kwargs)
+            energy_estimates[ix, jx] = energy
+    lifts = np.diff(energy_estimates, axis=1)
+    path_diffs = np.diff(paths, axis=1)
+    ordering = np.argmax(path_diffs, axis=-1)
+    ordered_lifts = np.take_along_axis(lifts, np.argsort(ordering, axis=1), axis=1)
+    # print(energy_estimates)
+    # print(lifts)
+    # print(ordered_lifts)
+    attributions = np.average(ordered_lifts, axis=0)
+    total_energy = energy_estimates[0, -1]
+    baseline_energy = energy_estimates[0, 0]
+    # check that we've attributed all losses
+    assert np.isclose(np.sum(attributions), total_energy - baseline_energy)
+    return attributions

From 9ef16ecb16d50b47f664eda5e639bc2f2b304c5c Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Wed, 28 Feb 2024 16:15:25 -0800
Subject: [PATCH 03/17] working implemenation of loss factor analysis

---
 solardatatools/algorithms/__init__.py         |  1 +
 .../algorithms/loss_factor_analysis.py        | 92 ++++++++++++++++++-
 2 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/solardatatools/algorithms/__init__.py b/solardatatools/algorithms/__init__.py
index f24986d9..bafe3c49 100644
--- a/solardatatools/algorithms/__init__.py
+++ b/solardatatools/algorithms/__init__.py
@@ -6,3 +6,4 @@
 from solardatatools.algorithms.soiling import SoilingAnalysis
 from solardatatools.algorithms.soiling import soiling_seperation_old
 from solardatatools.algorithms.soiling import soiling_seperation
+from solardatatools.algorithms.loss_factor_analysis import LossFactorAnalysis
diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index b66dcc06..2396d266 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -10,12 +10,15 @@
 
 """
 
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
 from gfosd import Problem
 import gfosd.components as comp
 from spcqe.functions import make_basis_matrix, make_regularization_matrix
 
 
-class DegradationSoilingEstimator:
+class LossFactorAnalysis:
     def __init__(
         self, energy_data, capacity_change_labels=None, outage_flags=None, **kwargs
     ):
@@ -219,6 +222,27 @@ def plot_waterfall(self):
         fig = waterfall_plot(data, index)
         return fig
 
+    def plot_decomposition(self, figsize=(16, 8.5)):
+        _fig_decomp = self.problem.plot_decomposition(
+            exponentiate=True, figsize=figsize
+        )
+        _ax = _fig_decomp.axes
+        _ax[0].plot(
+            np.arange(len(self.energy_data))[~self.use_ixs],
+            self.energy_model[-1, ~self.use_ixs],
+            color="red",
+            marker=".",
+            ls="none",
+        )
+        _ax[0].set_title("weather and system outages")
+        _ax[1].set_title("capacity changes")
+        _ax[2].set_title("soiling")
+        _ax[3].set_title("degradation")
+        _ax[4].set_title("baseline")
+        _ax[5].set_title("measured energy (green) and model minus weather")
+        plt.tight_layout()
+        return _fig_decomp
+
 
 def model_wrapper(energy_model, use_ixs):
     n = energy_model.shape[0]
@@ -317,3 +341,69 @@ def attribute_losses(energy_model, use_ixs):
     # check that we've attributed all losses
     assert np.isclose(np.sum(attributions), total_energy - baseline_energy)
     return attributions
+
+
+def waterfall_plot(data, index, figsize=(10, 4)):
+    # Store data and create a blank series to use for the waterfall
+    trans = pd.DataFrame(data=data, index=index)
+    blank = trans.amount.cumsum().shift(1).fillna(0)
+
+    # Get the net total number for the final element in the waterfall
+    total = trans.sum().amount
+    trans.loc["measured energy"] = total
+    blank.loc["measured energy"] = total
+
+    # The steps graphically show the levels as well as used for label placement
+    step = blank.reset_index(drop=True).repeat(3).shift(-1)
+    step[1::3] = np.nan
+
+    # When plotting the last element, we want to show the full bar,
+    # Set the blank to 0
+    blank.loc["measured energy"] = 0
+
+    # Plot and label
+    my_plot = trans.plot(
+        kind="bar",
+        stacked=True,
+        bottom=blank,
+        legend=None,
+        figsize=figsize,
+        title="System Loss Factor Waterfall",
+    )
+    my_plot.plot(step.index, step.values, "k")
+    my_plot.set_xlabel("Loss Factors")
+    my_plot.set_ylabel("Energy (Wh)")
+
+    # Get the y-axis position for the labels
+    y_height = trans.amount.cumsum().shift(1).fillna(0)
+
+    # Get an offset so labels don't sit right on top of the bar
+    max = trans.max()
+    max = max.iloc[0]
+    neg_offset = max / 25
+    pos_offset = max / 50
+    plot_offset = int(max / 15)
+
+    # Start label loop
+    loop = 0
+    for index, row in trans.iterrows():
+        # For the last item in the list, we don't want to double count
+        if row["amount"] == total:
+            y = y_height.iloc[loop]
+        else:
+            y = y_height.iloc[loop] + row["amount"]
+        # Determine if we want a neg or pos offset
+        if row["amount"] > 0:
+            y += pos_offset
+        else:
+            y -= neg_offset
+        my_plot.annotate("{:,.0f}".format(row["amount"]), (loop, y), ha="center")
+        loop += 1
+
+    # Scale up the y axis so there is room for the labels
+    my_plot.set_ylim(0, blank.max() + int(plot_offset))
+    # Rotate the labels
+    my_plot.set_xticklabels(trans.index, rotation=0)
+    fig = my_plot.get_figure()
+    fig.set_layout_engine(layout="tight")
+    return fig

From 4739993d8a488bff8d3aa64c0aee4afb54617b6e Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Wed, 28 Feb 2024 16:19:10 -0800
Subject: [PATCH 04/17] adding comment

---
 solardatatools/algorithms/loss_factor_analysis.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index 2396d266..ed16bd8c 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -148,6 +148,7 @@ def make_problem(
             n_reduce = int(0.9 * n_tot)
             bottom_mat = sp.lil_matrix((n_tot - n_reduce, n_reduce))
             bottom_mat[:, -1] = 1
+            # don't allow any downward shifts in the last 10% of the data (constrain values to be equal)
             custom_basis = sp.bmat([[sp.eye(n_reduce)], [bottom_mat]])
             c4 = comp.Aggregate(
                 [

From b6a0fbd202f472af47fe40fd0ae2e0c849dac3f0 Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Thu, 14 Mar 2024 09:47:29 -0700
Subject: [PATCH 05/17] monte carlo sampling

---
 .../algorithms/loss_factor_analysis.py        | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index ed16bd8c..cff0cac1 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -34,6 +34,7 @@ def __init__(
         self.capacity_change_labels = capacity_change_labels
         self.total_measured_energy = np.sum(self.energy_data[self.use_ixs])
         self.problem = self.make_problem(**kwargs)
+        self.user_settings = kwargs
         self.degradation_rate = None
         self.energy_model = None
         self.log_energy_model = None
@@ -51,6 +52,66 @@ def __init__(
         self.weather_percent = None
         self.outage_percent = None
 
+    def estimate_degradation_rate(
+        self,
+        max_samples=500,
+        median_tol=1e-3,
+        confidence_tol=1e-2,
+        fraction_hold=0.2,
+        method="median_unbiased",
+        debug=False,
+    ):
+        old_ixs = np.copy(self.use_ixs)
+        change_is_small_now = False
+        change_is_small_window = False
+        output = pd.DataFrame(columns=["tau", "weight", "deg"])
+        running_stats = pd.DataFrame(columns=["p50", "p025", "p975"])
+        counter = 0
+        while not (change_is_small_now and change_is_small_window):
+            # print(counter)
+            tau = np.random.uniform(0.85, 0.95)
+            weight = np.random.uniform(0.5, 5)
+            good_ixs = np.arange(len(self.use_ixs))[self.use_ixs]
+            num_values = int(len(good_ixs) * fraction_hold)
+            # select random index locations for removal
+            selected_ixs = np.random.choice(good_ixs, size=num_values, replace=False)
+            msk = np.zeros(len(self.use_ixs), dtype=bool)
+            msk[selected_ixs] = True
+            new_ixs = ~np.logical_or(~self.use_ixs, msk)
+            self.use_ixs = new_ixs
+            # remake modified problem
+            self.problem = self.make_problem(tau=tau, weight_soiling_stiffness=weight)
+            # run signal decomposition and shapley attribution
+            self.estimate_losses()
+            # record running results
+            output.loc[counter] = [tau, weight, self.degradation_rate]
+            running_stats.loc[counter] = [
+                np.median(output["deg"]),
+                np.quantile(output["deg"], 0.025, method=method),
+                np.quantile(output["deg"], 0.975, method=method),
+            ]
+            self.use_ixs = old_ixs
+            counter += 1
+            # check exit conditions
+            if counter < 10:
+                # get at least 10 samples
+                continue
+            elif counter > max_samples:
+                # don't go over max_samples
+                break
+            diffs = np.diff(running_stats, axis=0)
+            change_is_small_now = np.all(
+                np.abs(diffs[-1]) <= [median_tol, confidence_tol, confidence_tol]
+            )
+            change_is_small_window = np.all(
+                np.average(np.abs(diffs[-10:]), axis=0)
+                <= [median_tol, confidence_tol, confidence_tol]
+            )
+            if debug:
+                print(counter, running_stats[-1], diffs[-1])
+        self.problem = self.make_problem(**self.user_settings)
+        return output, running_stats
+
     def estimate_losses(self, solver="CLARABEL"):
         self.problem.decompose(solver=solver, verbose=False)
         # in the SD formulation, we put the residual term first, so it's the reverse order of how we specify this model (weather last)

From 8efd686370e2e373cf650566bd5b4274e52a05c1 Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Thu, 14 Mar 2024 10:10:27 -0700
Subject: [PATCH 06/17] updating requirements

---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 9e05dc2d..1a13ab6e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,3 +18,5 @@ sig-decomp
 osqp
 clarabel
 qss
+tqdm
+git+https://github.com/cvxgrp/spcqe.git

From 26596d4573eb2b7ef220a31c54f851b76998588e Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Thu, 14 Mar 2024 11:01:21 -0700
Subject: [PATCH 07/17] updates to MC subroutine

---
 .../algorithms/loss_factor_analysis.py        | 35 +++++++++++++++++--
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index cff0cac1..b69e829a 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -16,6 +16,7 @@
 from gfosd import Problem
 import gfosd.components as comp
 from spcqe.functions import make_basis_matrix, make_regularization_matrix
+from tqdm import tqdm
 
 
 class LossFactorAnalysis:
@@ -59,6 +60,7 @@ def estimate_degradation_rate(
         confidence_tol=1e-2,
         fraction_hold=0.2,
         method="median_unbiased",
+        verbose=False,
         debug=False,
     ):
         old_ixs = np.copy(self.use_ixs)
@@ -67,8 +69,29 @@ def estimate_degradation_rate(
         output = pd.DataFrame(columns=["tau", "weight", "deg"])
         running_stats = pd.DataFrame(columns=["p50", "p025", "p975"])
         counter = 0
+        if verbose:
+            print(
+                """
+            ************************************************
+            * Solar Data Tools Degradation Estimation Tool *
+            ************************************************
+
+            Monte Carlo sampling to generate a distributional estimate
+            of the degradation rate [%/yr]
+
+            The distribution typically stabilizes in 50-100 samples.
+
+            Author: Bennet Meyers, SLAC
+
+            This material is based upon work supported by the U.S. Department
+            of Energy's Office of Energy Efficiency and Renewable Energy (EERE)
+            under the Solar Energy Technologies Office Award Number 38529.\n
+            """
+            )
+            progress = tqdm()
         while not (change_is_small_now and change_is_small_window):
-            # print(counter)
+            if verbose:
+                progress.update()
             tau = np.random.uniform(0.85, 0.95)
             weight = np.random.uniform(0.5, 5)
             good_ixs = np.arange(len(self.use_ixs))[self.use_ixs]
@@ -107,8 +130,14 @@ def estimate_degradation_rate(
                 np.average(np.abs(diffs[-10:]), axis=0)
                 <= [median_tol, confidence_tol, confidence_tol]
             )
-            if debug:
-                print(counter, running_stats[-1], diffs[-1])
+            if verbose and counter % 5 == 0:
+                vn = running_stats.values[-1]
+                progress.write(
+                    f"P50, P02.5, P97.5: {vn[0]:.3f}, {vn[1]:.3f}, {vn[2]:.3f}"
+                )
+                progress.write(
+                    f"changes: {diffs[-1][0]:.3e}, {diffs[-1][1]:.3e}, {diffs[-1][2]:.3e}"
+                )
         self.problem = self.make_problem(**self.user_settings)
         return output, running_stats
 

From ea87903b8a2c38ee87aa8deb12ae9c87c6e54d0f Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Thu, 14 Mar 2024 15:06:51 -0700
Subject: [PATCH 08/17] can now set a degradation rate manually, or inherit it
 in the class from the MC estimate

---
 .../algorithms/loss_factor_analysis.py        | 33 ++++++++++++-------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index b69e829a..33b05c03 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -34,6 +34,7 @@ def __init__(
             self.use_ixs = np.logical_and(self.use_ixs, ~outage_flags)
         self.capacity_change_labels = capacity_change_labels
         self.total_measured_energy = np.sum(self.energy_data[self.use_ixs])
+        self.MC_results = None
         self.problem = self.make_problem(**kwargs)
         self.user_settings = kwargs
         self.degradation_rate = None
@@ -115,6 +116,15 @@ def estimate_degradation_rate(
             ]
             self.use_ixs = old_ixs
             counter += 1
+            diffs = np.diff(running_stats, axis=0)
+            if verbose and (counter + 1) % 10 == 0:
+                vn = running_stats.values[-1]
+                progress.write(
+                    f"P50, P02.5, P97.5: {vn[0]:.3f}, {vn[1]:.3f}, {vn[2]:.3f}"
+                )
+                progress.write(
+                    f"changes: {diffs[-1][0]:.3e}, {diffs[-1][1]:.3e}, {diffs[-1][2]:.3e}"
+                )
             # check exit conditions
             if counter < 10:
                 # get at least 10 samples
@@ -122,7 +132,7 @@ def estimate_degradation_rate(
             elif counter > max_samples:
                 # don't go over max_samples
                 break
-            diffs = np.diff(running_stats, axis=0)
+
             change_is_small_now = np.all(
                 np.abs(diffs[-1]) <= [median_tol, confidence_tol, confidence_tol]
             )
@@ -130,16 +140,9 @@ def estimate_degradation_rate(
                 np.average(np.abs(diffs[-10:]), axis=0)
                 <= [median_tol, confidence_tol, confidence_tol]
             )
-            if verbose and counter % 5 == 0:
-                vn = running_stats.values[-1]
-                progress.write(
-                    f"P50, P02.5, P97.5: {vn[0]:.3f}, {vn[1]:.3f}, {vn[2]:.3f}"
-                )
-                progress.write(
-                    f"changes: {diffs[-1][0]:.3e}, {diffs[-1][1]:.3e}, {diffs[-1][2]:.3e}"
-                )
+        self.degradation_rate = np.median(output["deg"])
+        self.MC_results = {"samples": output, "running stats": running_stats}
         self.problem = self.make_problem(**self.user_settings)
-        return output, running_stats
 
     def estimate_losses(self, solver="CLARABEL"):
         self.problem.decompose(solver=solver, verbose=False)
@@ -207,7 +210,12 @@ def make_problem(
         weight_soiling_stiffness=1e0,
         weight_soiling_sparsity=1e-2,
         weight_deg_nonlinear=10e4,
+        deg_rate=None,
     ):
+        # Inherit degradation rate if Monte Carlo sampling has been conducted, but only if the user doesn't pass their
+        # own rate
+        if deg_rate is None and self.MC_results is not None:
+            deg_rate = self.degradation_rate
         # Pinball loss noise
         c1 = comp.SumQuantile(tau=tau)
         # Smooth periodic term
@@ -232,7 +240,10 @@ def make_problem(
             c3 = comp.Aggregate([comp.NoSlope(), comp.FirstValEqual(value=0)])
         # Degradation term
         if deg_type == "linear":
-            c4 = comp.Aggregate([comp.NoCurvature(), comp.FirstValEqual(value=0)])
+            atom_list = [comp.NoCurvature(), comp.FirstValEqual(value=0)]
+            if deg_rate is not None:
+                atom_list.append(comp.FirstValEqual(value=deg_rate / 100 / 365, diff=1))
+            c4 = comp.Aggregate(atom_list)
         elif deg_type == "nonlinear":
             n_tot = length
             n_reduce = int(0.9 * n_tot)

From 674634e175fdbc30dfa2a19e9458c11e9f3d0bfe Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Thu, 14 Mar 2024 22:41:24 -0700
Subject: [PATCH 09/17] reorganize and add better class for setting the
 component equal to known values

---
 .../algorithms/loss_factor_analysis.py        | 215 ++++++++++++------
 solardatatools/data_handler.py                | 209 ++++++++++-------
 2 files changed, 275 insertions(+), 149 deletions(-)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index 33b05c03..0723d4d9 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -15,6 +15,7 @@
 import matplotlib.pyplot as plt
 from gfosd import Problem
 import gfosd.components as comp
+from gfosd.components.base_graph_class import GraphComponent
 from spcqe.functions import make_basis_matrix, make_regularization_matrix
 from tqdm import tqdm
 
@@ -144,8 +145,8 @@ def estimate_degradation_rate(
         self.MC_results = {"samples": output, "running stats": running_stats}
         self.problem = self.make_problem(**self.user_settings)
 
-    def estimate_losses(self, solver="CLARABEL"):
-        self.problem.decompose(solver=solver, verbose=False)
+    def estimate_losses(self, solver="CLARABEL", verbose=False):
+        self.problem.decompose(solver=solver, verbose=verbose)
         # in the SD formulation, we put the residual term first, so it's the reverse order of how we specify this model (weather last)
         self.log_energy_model = self.problem.decomposition[::-1]
         self.energy_model = np.exp(self.log_energy_model)
@@ -195,6 +196,122 @@ def report(self):
             }
             return out
 
+    def plot_pie(self):
+        plt.pie(
+            [
+                np.clip(-self.degradation_energy_loss, 0, np.inf),
+                np.clip(-self.soiling_energy_loss, 0, np.inf),
+                np.clip(-self.capacity_change_loss, 0, np.inf),
+                np.clip(-self.weather_energy_loss, 0, np.inf),
+                np.clip(-self.outage_energy_loss, 0, np.inf),
+            ],
+            labels=[
+                "degradation",
+                "soiling",
+                "capacity change",
+                "weather",
+                "outages",
+            ],
+            autopct="%1.1f%%",
+        )
+        plt.title("System loss breakdown")
+        return plt.gcf()
+
+    def plot_waterfall(self):
+        index = [
+            "baseline",
+            "weather",
+            "outages",
+            "capacity changes",
+            "soiling",
+            "degradation",
+        ]
+        bl = np.sum(self.energy_model[0])
+        data = {
+            "amount": [
+                bl,
+                self.weather_energy_loss,
+                self.outage_energy_loss,
+                self.capacity_change_loss,
+                self.soiling_energy_loss,
+                self.degradation_energy_loss,
+            ]
+        }
+        fig = waterfall_plot(data, index)
+        return fig
+
+    def plot_decomposition(self, figsize=(16, 8.5)):
+        _fig_decomp = self.problem.plot_decomposition(
+            exponentiate=True, figsize=figsize
+        )
+        _ax = _fig_decomp.axes
+        _ax[0].plot(
+            np.arange(len(self.energy_data))[~self.use_ixs],
+            self.energy_model[-1, ~self.use_ixs],
+            color="red",
+            marker=".",
+            ls="none",
+        )
+        _ax[0].set_title("weather and system outages")
+        _ax[1].set_title("capacity changes")
+        _ax[2].set_title("soiling")
+        _ax[3].set_title("degradation")
+        _ax[4].set_title("baseline")
+        _ax[5].set_title("measured energy (green) and model minus weather")
+        plt.tight_layout()
+        return _fig_decomp
+
+    def plot_mc_histogram(self, figsize=None, title=None):
+        if self.MC_results is not None:
+            if figsize is not None:
+                fig = plt.figure(figsize=figsize)
+            degs = self.MC_results["samples"]["deg"]
+            n, bins, patches = plt.hist(degs)
+            plt.axvline(np.average(degs), color="yellow", label="mean")
+            plt.axvline(np.median(degs), color="orange", label="median")
+            mode_index = n.argmax()
+            plt.axvline(
+                bins[mode_index] + np.diff(bins)[0] / 2, color="red", label="mode"
+            )
+            plt.axvline(np.quantile(degs, 0.025), color="gray", ls=":")
+            plt.axvline(
+                np.quantile(degs, 0.975), color="gray", ls=":", label="95% confidence"
+            )
+            plt.legend()
+            if title is not None:
+                plt.title(title)
+            return plt.gcf()
+
+    def plot_mc_by_tau(self, figsize=None, title=None):
+        if self.MC_results is not None:
+            if figsize is not None:
+                fig = plt.figure(figsize=figsize)
+            degs = self.MC_results["samples"]["deg"]
+            taus = self.MC_results["samples"]["tau"]
+            weights = self.MC_results["samples"]["weight"]
+            plt.scatter(taus, degs, c=weights, cmap="plasma")
+            plt.colorbar(label="soiling stiffness [1]")
+            plt.xlabel("quantile level, tau [1]")
+            plt.ylabel("degradation rate estimate [%/yr]")
+            if title is not None:
+                plt.title(title)
+            return plt.gcf()
+
+    def plot_mc_by_weight(self, figsize=None, title=None):
+        if self.MC_results is not None:
+            if figsize is not None:
+                fig = plt.figure(figsize=figsize)
+            degs = self.MC_results["samples"]["deg"]
+            taus = self.MC_results["samples"]["tau"]
+            weights = self.MC_results["samples"]["weight"]
+            plt.scatter(weights, degs, c=taus, cmap="plasma")
+            plt.colorbar(label="quantile level, tau [1]")
+            plt.xlabel("soiling stiffness [1]")
+            plt.ylabel("degradation rate estimate [%/yr]")
+            if title is not None:
+                plt.title(title)
+            return plt.gcf()
+
     def holdout_validate(self, seed=None, solver="CLARABEL"):
         residual, test_ix = self.problem.holdout_decompose(seed=seed, solver=solver)
         error_metric = np.sum(np.abs(residual))
@@ -240,10 +357,11 @@ def make_problem(
             c3 = comp.Aggregate([comp.NoSlope(), comp.FirstValEqual(value=0)])
         # Degradation term
         if deg_type == "linear":
-            atom_list = [comp.NoCurvature(), comp.FirstValEqual(value=0)]
-            if deg_rate is not None:
-                atom_list.append(comp.FirstValEqual(value=deg_rate / 100 / 365, diff=1))
-            c4 = comp.Aggregate(atom_list)
+            if deg_rate is None:
+                c4 = comp.Aggregate([comp.NoCurvature(), comp.FirstValEqual(value=0)])
+            else:
+                val = np.cumsum(np.r_[[0], np.ones(length - 1) * deg_rate / 100 / 365])
+                c4 = SetEqual(val=val)
         elif deg_type == "nonlinear":
             n_tot = length
             n_reduce = int(0.9 * n_tot)
@@ -260,7 +378,7 @@ def make_problem(
                 ]
             )
         elif deg_select.value == "none":
-            c4 = comp.Aggregate([comp.NoSlope(), comp.FirstValEqual(value=0)])
+            c4 = SetEqual(val=np.zeros(length))
         # capacity change term — leverage previous analysis from SDT pipeline
         if self.capacity_change_labels is not None:
             basis_M = np.zeros((length, len(set(self.capacity_change_labels))))
@@ -275,76 +393,11 @@ def make_problem(
                 ]
             )
         else:
-            c5 = comp.Aggregate([comp.NoSlope(), comp.FirstValEqual(value=0)])
+            c5 = SetEqual(val=np.zeros(length))
 
         prob = Problem(self.log_energy, [c1, c5, c3, c4, c2], use_set=self.use_ixs)
         return prob
 
-    def plot_pie(self):
-        plt.pie(
-            [
-                np.clip(-self.degradation_energy_loss, 0, np.inf),
-                np.clip(-self.soiling_energy_loss, 0, np.inf),
-                np.clip(-self.capacity_change_loss, 0, np.inf),
-                np.clip(-self.weather_energy_loss, 0, np.inf),
-                np.clip(-self.outage_energy_loss, 0, np.inf),
-            ],
-            labels=[
-                "degradation",
-                "soiling",
-                "capacity change",
-                "weather",
-                "outages",
-            ],
-            autopct="%1.1f%%",
-        )
-        plt.title("System loss breakdown")
-        return plt.gcf()
-
-    def plot_waterfall(self):
-        index = [
-            "baseline",
-            "weather",
-            "outages",
-            "capacity changes",
-            "soiling",
-            "degradation",
-        ]
-        bl = np.sum(self.energy_model[0])
-        data = {
-            "amount": [
-                bl,
-                self.weather_energy_loss,
-                self.outage_energy_loss,
-                self.capacity_change_loss,
-                self.soiling_energy_loss,
-                self.degradation_energy_loss,
-            ]
-        }
-        fig = waterfall_plot(data, index)
-        return fig
-
-    def plot_decomposition(self, figsize=(16, 8.5)):
-        _fig_decomp = self.problem.plot_decomposition(
-            exponentiate=True, figsize=figsize
-        )
-        _ax = _fig_decomp.axes
-        _ax[0].plot(
-            np.arange(len(self.energy_data))[~self.use_ixs],
-            self.energy_model[-1, ~self.use_ixs],
-            color="red",
-            marker=".",
-            ls="none",
-        )
-        _ax[0].set_title("weather and system outages")
-        _ax[1].set_title("capacity changes")
-        _ax[2].set_title("soiling")
-        _ax[3].set_title("degradation")
-        _ax[4].set_title("baseline")
-        _ax[5].set_title("measured energy (green) and model minus weather")
-        plt.tight_layout()
-        return _fig_decomp
-
 
 def model_wrapper(energy_model, use_ixs):
     n = energy_model.shape[0]
@@ -509,3 +562,19 @@ def waterfall_plot(data, index, figsize=(10, 4)):
     fig = my_plot.get_figure()
     fig.set_layout_engine(layout="tight")
     return fig
+
+
+class SetEqual(GraphComponent):
+    def __init__(self, val, *args, **kwargs):
+        super().__init__(diff=0, *args, **kwargs)
+        self._has_helpers = True
+        self._val = val
+
+    def _set_z_size(self):
+        self._z_size = 0
+
+    def _make_A(self):
+        super()._make_A()
+
+    def _make_c(self):
+        self._c = self._val
diff --git a/solardatatools/data_handler.py b/solardatatools/data_handler.py
index dd1bb143..c744b5b5 100644
--- a/solardatatools/data_handler.py
+++ b/solardatatools/data_handler.py
@@ -37,6 +37,7 @@
     TimeShift,
     SunriseSunset,
     ClippingDetection,
+    LossFactorAnalysis,
 )
 from pandas.plotting import register_matplotlib_converters
 
@@ -153,6 +154,7 @@ def _initialize_attributes(self):
         self.clear_day_analysis = None
         self.parameter_estimation = None
         self.polar_transform = None
+        self.loss_analysis = None
         # Private attributes
         self._ran_pipeline = False
         self._error_msg = ""
@@ -617,6 +619,137 @@ def report(self, verbose=True, return_values=False):
         else:
             return
 
+    def fix_dst(self):
+        """
+        Helper function for fixing data sets with known DST shift. This function
+        works for data recorded anywhere in the United States. The choice of
+        timezone (e.g. 'US/Pacific') does not matter, as long as the dates
+        of the clock changes are the same.
+        :return:
+        """
+        if not self.__fix_dst_ran:
+            df = self.data_frame_raw
+            df_localized = df.tz_localize(
+                "US/Pacific", ambiguous="NaT", nonexistent="NaT"
+            )
+            df_localized = df_localized[df_localized.index == df_localized.index]
+            df_localized = df_localized.tz_convert("Etc/GMT+8")
+            df_localized = df_localized.tz_localize(None)
+            self.data_frame_raw = df_localized
+            self.__fix_dst_ran = True
+            return
+        else:
+            print("DST correction already performed on this data set.")
+            return
+
+    def run_loss_factor_analysis(
+        self,
+        verbose=True,
+        tau=0.9,
+        num_harmonics=4,
+        deg_type="linear",
+        include_soiling=True,
+        weight_seasonal=10e-2,
+        weight_soiling_stiffness=1e0,
+        weight_soiling_sparsity=1e-2,
+        weight_deg_nonlinear=10e4,
+        deg_rate=None,
+    ):
+        """
+
+        :return:
+        """
+        self.loss_analysis = LossFactorAnalysis(
+            self.daily_signals.energy,
+            capacity_change_labels=self.capacity_analysis.labels,
+            outage_flags=~self.daily_flags.no_errors,
+            tau=tau,
+            num_harmonics=num_harmonics,
+            deg_type=deg_type,
+            include_soiling=include_soiling,
+            weight_seasonal=weight_seasonal,
+            weight_soiling_stiffness=weight_soiling_stiffness,
+            weight_soiling_sparsity=weight_soiling_sparsity,
+            weight_deg_nonlinear=weight_deg_nonlinear,
+            deg_rate=deg_rate,
+        )
+        if deg_rate is None:
+            self.loss_analysis.estimate_degradation_rate(verbose=verbose)
+        elif verbose:
+            print("Loading user-provided degradation rate.")
+        if verbose:
+            print("Performing loss factor analysis...")
+        self.loss_analysis.estimate_losses()
+        if verbose:
+            print(
+                f"""
+            ***************************************
+            * Solar Data Tools Loss Factor Report *
+            ***************************************
+
+            degradation rate [%/yr]:           {self.loss_analysis.degradation_rate:.3f},
+            total energy loss [kWh]:           {self.loss_analysis.total_energy_loss:.1f},
+            degradation energy loss [kWh]:     {self.loss_analysis.degradation_energy_loss:.1f},
+            soiling energy loss [kWh]:         {self.loss_analysis.soiling_energy_loss:.1f},
+            capacity change energy loss [kWh]: {self.loss_analysis.capacity_change_loss:.1f},
+            weather energy loss [kWh]:         {self.loss_analysis.weather_energy_loss:.1f},
+            system outage loss [kWh]:          {self.loss_analysis.outage_energy_loss:.1f},
+            """
+            )
+
+    def fit_statistical_clear_sky_model(
+        self,
+        data_matrix=None,
+        rank=6,
+        mu_l=None,
+        mu_r=None,
+        tau=None,
+        exit_criterion_epsilon=1e-3,
+        solver_type="MOSEK",
+        max_iteration=10,
+        calculate_degradation=True,
+        max_degradation=None,
+        min_degradation=None,
+        non_neg_constraints=False,
+        verbose=True,
+        bootstraps=None,
+    ):
+        try:
+            from statistical_clear_sky import SCSF
+        except ImportError:
+            print("Please install statistical-clear-sky package")
+            return
+        scsf = SCSF(
+            data_handler_obj=self,
+            data_matrix=data_matrix,
+            rank_k=rank,
+            solver_type=solver_type,
+        )
+        scsf.execute(
+            mu_l=mu_l,
+            mu_r=mu_r,
+            tau=tau,
+            exit_criterion_epsilon=exit_criterion_epsilon,
+            max_iteration=max_iteration,
+            is_degradation_calculated=calculate_degradation,
+            max_degradation=max_degradation,
+            min_degradation=min_degradation,
+            non_neg_constraints=non_neg_constraints,
+            verbose=verbose,
+            bootstraps=bootstraps,
+        )
+        self.scsf = scsf
+
+    def calculate_scsf_performance_index(self):
+        if self.scsf is None:
+            print("No SCSF model detected. Fitting now...")
+            self.fit_statistical_clear_sky_model()
+        clear = self.scsf.estimated_power_matrix
+        clear_energy = np.sum(clear, axis=0)
+        measured_energy = np.sum(self.filled_data_matrix, axis=0)
+        pi = np.divide(measured_energy, clear_energy)
+        return pi
+
     def augment_data_frame(self, boolean_index, column_name):
         """
         Add a column to the data frame (tabular) representation of the data,
@@ -688,29 +821,6 @@ def augment_data_frame(self, boolean_index, column_name):
         self.data_frame_raw.index = old_index
         self.data_frame_raw[self.seq_index_key] = old_col
 
-    def fix_dst(self):
-        """
-        Helper function for fixing data sets with known DST shift. This function
-        works for data recorded anywhere in the United States. The choice of
-        timezone (e.g. 'US/Pacific') does not matter, as long as the dates
-        of the clock changes are the same.
-        :return:
-        """
-        if not self.__fix_dst_ran:
-            df = self.data_frame_raw
-            df_localized = df.tz_localize(
-                "US/Pacific", ambiguous="NaT", nonexistent="NaT"
-            )
-            df_localized = df_localized[df_localized.index == df_localized.index]
-            df_localized = df_localized.tz_convert("Etc/GMT+8")
-            df_localized = df_localized.tz_localize(None)
-            self.data_frame_raw = df_localized
-            self.__fix_dst_ran = True
-            return
-        else:
-            print("DST correction already performed on this data set.")
-            return
-
     def make_data_matrix(self, use_col=None, start_day_ix=None, end_day_ix=None):
         df = self.data_frame
         if use_col is None:
@@ -1060,59 +1170,6 @@ def find_clear_times(
         )
         self.boolean_masks.clear_times = clear_times
 
-    def fit_statistical_clear_sky_model(
-        self,
-        data_matrix=None,
-        rank=6,
-        mu_l=None,
-        mu_r=None,
-        tau=None,
-        exit_criterion_epsilon=1e-3,
-        solver_type="MOSEK",
-        max_iteration=10,
-        calculate_degradation=True,
-        max_degradation=None,
-        min_degradation=None,
-        non_neg_constraints=False,
-        verbose=True,
-        bootstraps=None,
-    ):
-        try:
-            from statistical_clear_sky import SCSF
-        except ImportError:
-            print("Please install statistical-clear-sky package")
-            return
-        scsf = SCSF(
-            data_handler_obj=self,
-            data_matrix=data_matrix,
-            rank_k=rank,
-            solver_type=solver_type,
-        )
-        scsf.execute(
-            mu_l=mu_l,
-            mu_r=mu_r,
-            tau=tau,
-            exit_criterion_epsilon=exit_criterion_epsilon,
-            max_iteration=max_iteration,
-            is_degradation_calculated=calculate_degradation,
-            max_degradation=max_degradation,
-            min_degradation=min_degradation,
-            non_neg_constraints=non_neg_constraints,
-            verbose=verbose,
-            bootstraps=bootstraps,
-        )
-        self.scsf = scsf
-
-    def calculate_scsf_performance_index(self):
-        if self.scsf is None:
-            print("No SCSF model detected. Fitting now...")
-            self.fit_statistical_clear_sky_model()
-        clear = self.scsf.estimated_power_matrix
-        clear_energy = np.sum(clear, axis=0)
-        measured_energy = np.sum(self.filled_data_matrix, axis=0)
-        pi = np.divide(measured_energy, clear_energy)
-        return pi
-
     def setup_location_and_orientation_estimation(
         self,
         gmt_offset,

From 7f04ea900f1320dd7f905b5230fc6c61676c9b3b Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Thu, 14 Mar 2024 23:52:56 -0700
Subject: [PATCH 10/17] different prints if MC is run or not

---
 solardatatools/data_handler.py | 50 ++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/solardatatools/data_handler.py b/solardatatools/data_handler.py
index c744b5b5..747f942c 100644
--- a/solardatatools/data_handler.py
+++ b/solardatatools/data_handler.py
@@ -681,21 +681,41 @@ def run_loss_factor_analysis(
             print("Performing loss factor analysis...")
         self.loss_analysis.estimate_losses()
         if verbose:
-            print(
-                f"""
-            ***************************************
-            * Solar Data Tools Loss Factor Report *
-            ***************************************
-
-            degradation rate [%/yr]:           {self.loss_analysis.degradation_rate:.3f},
-            total energy loss [kWh]:           {self.loss_analysis.total_energy_loss:.1f},
-            degradation energy loss [kWh]:     {self.loss_analysis.degradation_energy_loss:.1f},
-            soiling energy loss [kWh]:         {self.loss_analysis.soiling_energy_loss:.1f},
-            capacity change energy loss [kWh]: {self.loss_analysis.capacity_change_loss:.1f},
-            weather energy loss [kWh]:         {self.loss_analysis.weather_energy_loss:.1f},
-            system outage loss [kWh]:          {self.loss_analysis.outage_energy_loss:.1f},
-            """
-            )
+            lb = self.loss_analysis.degradation_rate_lb
+            ub = self.loss_analysis.degradation_rate_ub
+            if lb is not None and ub is not None:
+                print(
+                    f"""
+                ***************************************
+                * Solar Data Tools Loss Factor Report *
+                ***************************************
+
+                degradation rate [%/yr]:                    {self.loss_analysis.degradation_rate:.3f}
+                deg. rate 95% confidence:          [{lb:>6.3f}, {ub:>6.3f}]
+                total energy loss [kWh]:             {self.loss_analysis.total_energy_loss:>13.1f}
+                bulk deg. energy loss (gain) [kWh]:  {self.loss_analysis.degradation_energy_loss:>13.1f}
+                soiling energy loss [kWh]:           {self.loss_analysis.soiling_energy_loss:>13.1f}
+                capacity change energy loss [kWh]:   {self.loss_analysis.capacity_change_loss:>13.1f}
+                weather energy loss [kWh]:           {self.loss_analysis.weather_energy_loss:>13.1f}
+                system outage loss [kWh]:            {self.loss_analysis.outage_energy_loss:>13.1f}
+                """
+                )
+            else:
+                print(
+                    f"""
+                ***************************************
+                * Solar Data Tools Loss Factor Report *
+                ***************************************
+
+                degradation rate [%/yr]:                    {self.loss_analysis.degradation_rate:.3f}
+                total energy loss [kWh]:             {self.loss_analysis.total_energy_loss:>13.1f}
+                bulk deg. energy loss (gain) [kWh]:  {self.loss_analysis.degradation_energy_loss:>13.1f}
+                soiling energy loss [kWh]:           {self.loss_analysis.soiling_energy_loss:>13.1f}
+                capacity change energy loss [kWh]:   {self.loss_analysis.capacity_change_loss:>13.1f}
+                weather energy loss [kWh]:           {self.loss_analysis.weather_energy_loss:>13.1f}
+                system outage loss [kWh]:            {self.loss_analysis.outage_energy_loss:>13.1f}
+                """
+                )
 
     def fit_statistical_clear_sky_model(
         self,

From 36e104a3ad2db11dbd0e8cb4bd73d837c1812daa Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Thu, 14 Mar 2024 23:54:01 -0700
Subject: [PATCH 11/17] report upper and lower bounds

---
 solardatatools/algorithms/loss_factor_analysis.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index 0723d4d9..270a2fed 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -39,6 +39,8 @@ def __init__(
         self.problem = self.make_problem(**kwargs)
         self.user_settings = kwargs
         self.degradation_rate = None
+        self.degradation_rate_lb = None
+        self.degradation_rate_ub = None
         self.energy_model = None
         self.log_energy_model = None
         self.total_energy_loss = None
@@ -142,6 +144,8 @@ def estimate_degradation_rate(
                 <= [median_tol, confidence_tol, confidence_tol]
             )
         self.degradation_rate = np.median(output["deg"])
+        self.degradation_rate_lb = np.quantile(output["deg"], 0.025, method=method)
+        self.degradation_rate_ub = np.quantile(output["deg"], 0.975, method=method)
         self.MC_results = {"samples": output, "running stats": running_stats}
         self.problem = self.make_problem(**self.user_settings)
 
@@ -187,6 +191,8 @@ def report(self):
         if self.total_energy_loss is not None:
             out = {
                 "degradation rate [%/yr]": self.degradation_rate,
+                "deg rate lower bound [%/yr]": self.degradation_rate_lb,
+                "deg rate upper bound [%/yr]": self.degradation_rate_ub,
                 "total energy loss [kWh]": self.total_energy_loss,
                 "degradation energy loss [kWh]": self.degradation_energy_loss,
                 "soiling energy loss [kWh]": self.soiling_energy_loss,

From 570dab4daeec1dd137a2fd83ebaa370702a9f7d9 Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Fri, 15 Mar 2024 09:41:07 -0700
Subject: [PATCH 12/17] docstrings and cleanup

---
 .../algorithms/loss_factor_analysis.py        | 112 ++++++++++++++++--
 solardatatools/data_handler.py                |   6 +-
 2 files changed, 108 insertions(+), 10 deletions(-)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index 270a2fed..151c876a 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -24,6 +24,17 @@ class LossFactorAnalysis:
     def __init__(
         self, energy_data, capacity_change_labels=None, outage_flags=None, **kwargs
     ):
+        """
+        A class for analyzing loss factors, including the degradation rate, of a PV system
+
+        :param energy_data: a discrete time series of daily integrated energy
+        :type energy_data: numpy 1D array
+        :param capacity_change_labels: labeling of known capacity changes, e.g. from the CapacityChange tool
+        :type  capacity_change_labels: 1D array of integer labels, with same length as energy_data
+        :param outage_flags: labeling of days with known operational issues, e.g. from DataHandle pipeline
+        :type outage_flags: 1D array of integer labels, with same length as energy_data
+        :param kwargs: arguments to be passed to to the make_problem method
+        """
         self.energy_data = energy_data
         log_energy = np.zeros_like(self.energy_data)
         is_zero = np.isclose(energy_data, 0, atol=1e-1)
@@ -60,13 +71,31 @@ def __init__(
     def estimate_degradation_rate(
         self,
         max_samples=500,
-        median_tol=1e-3,
+        median_tol=5e-3,
         confidence_tol=1e-2,
         fraction_hold=0.2,
         method="median_unbiased",
         verbose=False,
-        debug=False,
     ):
+        """
+        This function runs a Monte Carlo simulation to estimate the uncertainty in the estimation of the degrdation rate
+        based on the loss model. This will randomly sample problem parameters (quantile level and soiling stiffness
+        weight), while randomly holding out 20% of the days each time. The algorithm exits when the estimates of the
+        median, 2.5 percentile, and 97.5 percentile have stabilized. Results are stored in the following class
+        attributes:
+            self.degradation_rate
+            self.degradation_rate_lb
+            self.degradation_rate_ub
+            self.MC_results
+
+        :param max_samples: maximimum number of MC samples to generate (typically exits before this)
+        :param median_tol: tolerance for median estimate stability
+        :param confidence_tol: tolerance for outer percentile estimate stability
+        :param fraction_hold: fraction of values to holdout in each sample
+        :param method: quantile estimation method (see: https://numpy.org/doc/stable/reference/generated/numpy.quantile.html)
+        :param verbose: control print statements
+        :return: None
+        """
         old_ixs = np.copy(self.use_ixs)
         change_is_small_now = False
         change_is_small_window = False
@@ -188,6 +217,10 @@ def estimate_losses(self, solver="CLARABEL", verbose=False):
         return
 
     def report(self):
+        """
+        Creates a machine-readible dictionary of result from the loss factor analysis
+        :return: dictionary
+        """
         if self.total_energy_loss is not None:
             out = {
                 "degradation rate [%/yr]": self.degradation_rate,
@@ -203,6 +236,11 @@ def report(self):
             return out
 
     def plot_pie(self):
+        """
+        Create a pie plot of losses
+
+        :return: matplotlib figure
+        """
         plt.pie(
             [
                 np.clip(-self.degradation_energy_loss, 0, np.inf),
@@ -224,6 +262,11 @@ def plot_pie(self):
         return plt.gcf()
 
     def plot_waterfall(self):
+        """
+        Create a waterfall plot of losses
+
+        :return: matplotlib figure
+        """
         index = [
             "baseline",
             "weather",
@@ -247,6 +290,12 @@ def plot_waterfall(self):
         return fig
 
     def plot_decomposition(self, figsize=(16, 8.5)):
+        """
+        Creates a figure with subplots illustrating the estimated signal components found through decomposition
+
+        :param figsize: size of figure (tuple)
+        :return: matplotlib figure
+        """
         _fig_decomp = self.problem.plot_decomposition(
             exponentiate=True, figsize=figsize
         )
@@ -268,6 +317,14 @@ def plot_decomposition(self, figsize=(16, 8.5)):
         return _fig_decomp
 
     def plot_mc_histogram(self, figsize=None, title=None):
+        """
+        Creates a historgram of the Monte Carlo samples and annotates the chart with mean, median, mode, and confidence
+        intervals.
+
+        :param figsize: size of figure (tuple)
+        :param title: title for figure (string)
+        :return: matplotlib figure
+        """
         if self.MC_results is not None:
             if figsize is not None:
                 fig = plt.figure(figsize=figsize)
@@ -289,6 +346,15 @@ def plot_mc_histogram(self, figsize=None, title=None):
             return plt.gcf()
 
     def plot_mc_by_tau(self, figsize=None, title=None):
+        """
+        Creates a scatterplot of the Monte Carlo samples versus tau (quantile level) and colors the points by the
+        weight of the soiling stiffness term
+
+
+        :param figsize: size of figure (tuple)
+        :param title: title for figure (string)
+        :return: matplotlib figure
+        """
         if self.MC_results is not None:
             if figsize is not None:
                 fig = plt.figure(figsize=figsize)
@@ -304,6 +370,15 @@ def plot_mc_by_tau(self, figsize=None, title=None):
             return plt.gcf()
 
     def plot_mc_by_weight(self, figsize=None, title=None):
+        """
+        Creates a scatterplot of the Monte Carlo samples versus weight (soiling stiffness) and colors the points by the
+        tau (quantile level)
+
+
+        :param figsize: size of figure (tuple)
+        :param title: title for figure (string)
+        :return: matplotlib figure
+        """
         if self.MC_results is not None:
             if figsize is not None:
                 fig = plt.figure(figsize=figsize)
@@ -318,11 +393,6 @@ def plot_mc_by_weight(self, figsize=None, title=None):
                 plt.title(title)
             return plt.gcf()
 
-    def holdout_validate(self, seed=None, solver="CLARABEL"):
-        residual, test_ix = self.problem.holdout_decompose(seed=seed, solver=solver)
-        error_metric = np.sum(np.abs(residual))
-        return error_metric
-
     def make_problem(
         self,
         tau=0.9,
@@ -335,6 +405,29 @@ def make_problem(
         weight_deg_nonlinear=10e4,
         deg_rate=None,
     ):
+        """
+        Constuct the signal decomposition problem for estimation of loss factors in PV energy data.
+
+        :param tau: the quantile level to fit
+        :type tau: float
+        :param num_harmonics: the number of harmonics to include in model for yearly periodicity
+        :type num_harmonics: int
+        :param deg_type: the type of degradation to model ("linear", "nonlinear", or "none")
+        :type deg_type: str
+        :param include_soiling: whether to include a soiling term
+        :type include_soiling: bool
+        :param weight_seasonal: the weight on the seasonal penalty term (higher is stiffer)
+        :type weight_seasonal: float
+        :param weight_soiling_stiffness: the weight on the soiling stiffness (higher is stiffer)
+        :type weight_soiling_stiffness: float
+        :param weight_soiling_sparsity: the weight on the soiling stiffness (higher is sparser)
+        :type weight_soiling_sparsity: float
+        :param weight_deg_nonlinear: only used if 'nonlinear' degradation model is selected
+        :type weight_deg_nonlinear: float
+        :param deg_rate: pass to set a known degradation rate rather than have the SD problem estimate it
+        :type deg_rate: None or float [%/yr]
+        :return: a gfosd.Problem instance
+        """
         # Inherit degradation rate if Monte Carlo sampling has been conducted, but only if the user doesn't pass their
         # own rate
         if deg_rate is None and self.MC_results is not None:
@@ -404,6 +497,11 @@ def make_problem(
         prob = Problem(self.log_energy, [c1, c5, c3, c4, c2], use_set=self.use_ixs)
         return prob
 
+    def holdout_validate(self, seed=None, solver="CLARABEL"):
+        residual, test_ix = self.problem.holdout_decompose(seed=seed, solver=solver)
+        error_metric = np.sum(np.abs(residual))
+        return error_metric
+
 
 def model_wrapper(energy_model, use_ixs):
     n = energy_model.shape[0]
diff --git a/solardatatools/data_handler.py b/solardatatools/data_handler.py
index 747f942c..bfb82a2d 100644
--- a/solardatatools/data_handler.py
+++ b/solardatatools/data_handler.py
@@ -650,8 +650,8 @@ def run_loss_factor_analysis(
         deg_type="linear",
         include_soiling=True,
         weight_seasonal=10e-2,
-        weight_soiling_stiffness=1e0,
-        weight_soiling_sparsity=1e-2,
+        weight_soiling_stiffness=2e0,
+        weight_soiling_sparsity=2e-2,
         weight_deg_nonlinear=10e4,
         deg_rate=None,
     ):
@@ -690,7 +690,7 @@ def run_loss_factor_analysis(
                 * Solar Data Tools Loss Factor Report *
                 ***************************************
 
-                degradation rate [%/yr]:                    {self.loss_analysis.degradation_rate:.3f}
+                degradation rate [%/yr]:                    {self.loss_analysis.degradation_rate:>6.3f}
                 deg. rate 95% confidence:          [{lb:>6.3f}, {ub:>6.3f}]
                 total energy loss [kWh]:             {self.loss_analysis.total_energy_loss:>13.1f}
                 bulk deg. energy loss (gain) [kWh]:  {self.loss_analysis.degradation_energy_loss:>13.1f}

From 65502a19c528baba30c6011c879780342186ab36 Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Fri, 15 Mar 2024 09:44:52 -0700
Subject: [PATCH 13/17] update file description

---
 solardatatools/algorithms/loss_factor_analysis.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index 151c876a..468176ae 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -3,10 +3,12 @@
 This module is for estimation of degradation and soiling losses from unlabeled
 daily energy production data. Model is of the form
 
-y_t = x_t * d_t * s_t * w_t, for t = 1,...,T
+y_t = x_t * d_t * s_t * c_t * w_t, for t \in K
 
 where y_t [kWh] is the measured real daily energy on each day, x_t [kWh] is an ideal yearly baseline of performance,
-and d_t, s_t, and w_t are the loss factors for degradation, soiling, and weather respectively.
+and d_t, s_t, and w_t are the loss factors for degradation, soiling, capacity changes, and weather respectively. K is
+the set of "known" index values, e.g. the days that we have good energy production values for (not missing or
+corrupted).
 
 """
 

From e9bafd5ef38a499b99398ec092bf9cb9dd4d067f Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Fri, 15 Mar 2024 09:45:13 -0700
Subject: [PATCH 14/17] adding author

---
 solardatatools/algorithms/loss_factor_analysis.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index 468176ae..973d27fc 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -10,6 +10,7 @@
 the set of "known" index values, e.g. the days that we have good energy production values for (not missing or
 corrupted).
 
+Author: Bennet Meyers
 """
 
 import numpy as np

From 973bd586d57f0a46e7dce2f2f22009dd57a8e0c1 Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Fri, 15 Mar 2024 09:50:49 -0700
Subject: [PATCH 15/17] cleanup class init import

---
 solardatatools/algorithms/loss_factor_analysis.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index 973d27fc..2a71899a 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -39,6 +39,10 @@ def __init__(
         :param kwargs: arguments to be passed to to the make_problem method
         """
         self.energy_data = energy_data
+        if len(self.energy_data) <= 365:
+            print(
+                "WARNING: This technique is not designed for use on less than one year of data!"
+            )
         log_energy = np.zeros_like(self.energy_data)
         is_zero = np.isclose(energy_data, 0, atol=1e-1)
         log_energy[is_zero] = np.nan
@@ -63,7 +67,6 @@ def __init__(
         self.soiling_energy_loss = None
         self.capacity_change_loss = None
         self.weather_energy_loss = None
-        self.weather_percent_loss = None
         self.outage_energy_loss = None
         self.degradation_percent = None
         self.soiling_percent = None

From e8d5afc6f175e7e3874844dd4c51a6d0beba5c6c Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Fri, 15 Mar 2024 09:53:16 -0700
Subject: [PATCH 16/17] comments

---
 solardatatools/algorithms/loss_factor_analysis.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/solardatatools/algorithms/loss_factor_analysis.py b/solardatatools/algorithms/loss_factor_analysis.py
index 2a71899a..b03d28ac 100644
--- a/solardatatools/algorithms/loss_factor_analysis.py
+++ b/solardatatools/algorithms/loss_factor_analysis.py
@@ -438,6 +438,8 @@ def make_problem(
         # own rate
         if deg_rate is None and self.MC_results is not None:
             deg_rate = self.degradation_rate
+        ### Construct Losses ###
+        # NB: the order in which the lossses are defined here are *not* the order they are in the problem (see below)
         # Pinball loss noise
         c1 = comp.SumQuantile(tau=tau)
         # Smooth periodic term
@@ -499,7 +501,7 @@ def make_problem(
             )
         else:
             c5 = SetEqual(val=np.zeros(length))
-
+        # Component order: weather, capacity change, soiling, degradation, baseline
         prob = Problem(self.log_energy, [c1, c5, c3, c4, c2], use_set=self.use_ixs)
         return prob
 

From dd17e6238ee39d83fa21b64d62a51cf42e07c77a Mon Sep 17 00:00:00 2001
From: Meyers-Im <bennetm@stanford.edu>
Date: Fri, 15 Mar 2024 10:23:55 -0700
Subject: [PATCH 17/17] graceful handling if pipeline is not run

---
 solardatatools/data_handler.py | 117 +++++++++++++++++----------------
 1 file changed, 60 insertions(+), 57 deletions(-)

diff --git a/solardatatools/data_handler.py b/solardatatools/data_handler.py
index bfb82a2d..1dda3041 100644
--- a/solardatatools/data_handler.py
+++ b/solardatatools/data_handler.py
@@ -659,63 +659,66 @@ def run_loss_factor_analysis(
 
         :return:
         """
-        self.loss_analysis = LossFactorAnalysis(
-            self.daily_signals.energy,
-            capacity_change_labels=self.capacity_analysis.labels,
-            outage_flags=~self.daily_flags.no_errors,
-            tau=tau,
-            num_harmonics=num_harmonics,
-            deg_type=deg_type,
-            include_soiling=include_soiling,
-            weight_seasonal=weight_seasonal,
-            weight_soiling_stiffness=weight_soiling_stiffness,
-            weight_soiling_sparsity=weight_soiling_sparsity,
-            weight_deg_nonlinear=weight_deg_nonlinear,
-            deg_rate=deg_rate,
-        )
-        if deg_rate is None:
-            self.loss_analysis.estimate_degradation_rate(verbose=verbose)
-        elif verbose:
-            print("Loading user-provided degradation rate.")
-        if verbose:
-            print("Performing loss factor analysis...")
-        self.loss_analysis.estimate_losses()
-        if verbose:
-            lb = self.loss_analysis.degradation_rate_lb
-            ub = self.loss_analysis.degradation_rate_ub
-            if lb is not None and ub is not None:
-                print(
-                    f"""
-                ***************************************
-                * Solar Data Tools Loss Factor Report *
-                ***************************************
-
-                degradation rate [%/yr]:                    {self.loss_analysis.degradation_rate:>6.3f}
-                deg. rate 95% confidence:          [{lb:>6.3f}, {ub:>6.3f}]
-                total energy loss [kWh]:             {self.loss_analysis.total_energy_loss:>13.1f}
-                bulk deg. energy loss (gain) [kWh]:  {self.loss_analysis.degradation_energy_loss:>13.1f}
-                soiling energy loss [kWh]:           {self.loss_analysis.soiling_energy_loss:>13.1f}
-                capacity change energy loss [kWh]:   {self.loss_analysis.capacity_change_loss:>13.1f}
-                weather energy loss [kWh]:           {self.loss_analysis.weather_energy_loss:>13.1f}
-                system outage loss [kWh]:            {self.loss_analysis.outage_energy_loss:>13.1f}
-                """
-                )
-            else:
-                print(
-                    f"""
-                ***************************************
-                * Solar Data Tools Loss Factor Report *
-                ***************************************
-
-                degradation rate [%/yr]:                    {self.loss_analysis.degradation_rate:.3f}
-                total energy loss [kWh]:             {self.loss_analysis.total_energy_loss:>13.1f}
-                bulk deg. energy loss (gain) [kWh]:  {self.loss_analysis.degradation_energy_loss:>13.1f}
-                soiling energy loss [kWh]:           {self.loss_analysis.soiling_energy_loss:>13.1f}
-                capacity change energy loss [kWh]:   {self.loss_analysis.capacity_change_loss:>13.1f}
-                weather energy loss [kWh]:           {self.loss_analysis.weather_energy_loss:>13.1f}
-                system outage loss [kWh]:            {self.loss_analysis.outage_energy_loss:>13.1f}
-                """
-                )
+        if self._ran_pipeline:
+            self.loss_analysis = LossFactorAnalysis(
+                self.daily_signals.energy,
+                capacity_change_labels=self.capacity_analysis.labels,
+                outage_flags=~self.daily_flags.no_errors,
+                tau=tau,
+                num_harmonics=num_harmonics,
+                deg_type=deg_type,
+                include_soiling=include_soiling,
+                weight_seasonal=weight_seasonal,
+                weight_soiling_stiffness=weight_soiling_stiffness,
+                weight_soiling_sparsity=weight_soiling_sparsity,
+                weight_deg_nonlinear=weight_deg_nonlinear,
+                deg_rate=deg_rate,
+            )
+            if deg_rate is None:
+                self.loss_analysis.estimate_degradation_rate(verbose=verbose)
+            elif verbose:
+                print("Loading user-provided degradation rate.")
+            if verbose:
+                print("Performing loss factor analysis...")
+            self.loss_analysis.estimate_losses()
+            if verbose:
+                lb = self.loss_analysis.degradation_rate_lb
+                ub = self.loss_analysis.degradation_rate_ub
+                if lb is not None and ub is not None:
+                    print(
+                        f"""
+                    ***************************************
+                    * Solar Data Tools Loss Factor Report *
+                    ***************************************
+
+                    degradation rate [%/yr]:                    {self.loss_analysis.degradation_rate:>6.3f}
+                    deg. rate 95% confidence:          [{lb:>6.3f}, {ub:>6.3f}]
+                    total energy loss [kWh]:             {self.loss_analysis.total_energy_loss:>13.1f}
+                    bulk deg. energy loss (gain) [kWh]:  {self.loss_analysis.degradation_energy_loss:>13.1f}
+                    soiling energy loss [kWh]:           {self.loss_analysis.soiling_energy_loss:>13.1f}
+                    capacity change energy loss [kWh]:   {self.loss_analysis.capacity_change_loss:>13.1f}
+                    weather energy loss [kWh]:           {self.loss_analysis.weather_energy_loss:>13.1f}
+                    system outage loss [kWh]:            {self.loss_analysis.outage_energy_loss:>13.1f}
+                    """
+                    )
+                else:
+                    print(
+                        f"""
+                    ***************************************
+                    * Solar Data Tools Loss Factor Report *
+                    ***************************************
+
+                    degradation rate [%/yr]:                    {self.loss_analysis.degradation_rate:.3f}
+                    total energy loss [kWh]:             {self.loss_analysis.total_energy_loss:>13.1f}
+                    bulk deg. energy loss (gain) [kWh]:  {self.loss_analysis.degradation_energy_loss:>13.1f}
+                    soiling energy loss [kWh]:           {self.loss_analysis.soiling_energy_loss:>13.1f}
+                    capacity change energy loss [kWh]:   {self.loss_analysis.capacity_change_loss:>13.1f}
+                    weather energy loss [kWh]:           {self.loss_analysis.weather_energy_loss:>13.1f}
+                    system outage loss [kWh]:            {self.loss_analysis.outage_energy_loss:>13.1f}
+                    """
+                    )
+        else:
+            print("Please run pipeline first.")
 
     def fit_statistical_clear_sky_model(
         self,