From 0cc459984afe22f55e261a1944968c7df2bb42fc Mon Sep 17 00:00:00 2001
From: muditbhargava66 <muditbhargava666@gmail.com>
Date: Thu, 25 Apr 2024 04:10:25 -0400
Subject: [PATCH] =?UTF-8?q?Version=203=20features=20=F0=9F=9B=A0=EF=B8=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../benchmark_visualizations.py               |  98 +++++++++++++++++
 .../vit_experiments/mathematical_analysis.py  | 101 ++++++++++++++++++
 2 files changed, 199 insertions(+)
 create mode 100644 examples/vit_experiments/benchmark_visualizations.py
 create mode 100644 examples/vit_experiments/mathematical_analysis.py

diff --git a/examples/vit_experiments/benchmark_visualizations.py b/examples/vit_experiments/benchmark_visualizations.py
new file mode 100644
index 0000000..9c7e13a
--- /dev/null
+++ b/examples/vit_experiments/benchmark_visualizations.py
@@ -0,0 +1,98 @@
+"""
+Visualizing the Behavior of DropGrad with Various Optimizers on Optimization Benchmarks
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.optimize import rosen
+from dropgrad import DropGrad
+import torch
+from torch.optim import SGD, Adam, AdamW, Adagrad, Adadelta
+
+def rastrigin(x):
+    A = 10
+    n = len(x)
+    return A * n + sum(x**2 - A * np.cos(2 * np.pi * x))
+
+def ackley(x):
+    a = 20
+    b = 0.2
+    c = 2 * np.pi
+    d = len(x)
+    sum_sq_term = -a * np.exp(-b * np.sqrt(np.sum(x**2) / d))
+    cos_term = -np.exp(np.sum(np.cos(c * x)) / d)
+    return a + np.exp(1) + sum_sq_term + cos_term
+
+def apply_dropgrad(optimizer, drop_rate):
+    """
+    Apply DropGrad to the optimizer if drop_rate is greater than 0.
+    """
+    if drop_rate > 0:
+        return DropGrad(optimizer, drop_rate=drop_rate)
+    return optimizer
+
+def optimize(optimizer, x, benchmark_func, num_iterations):
+    """
+    Run the optimizer on the benchmark function for a given number of iterations.
+    """
+    trajectory = [x.detach().numpy().copy()]
+
+    for _ in range(num_iterations):
+        optimizer.zero_grad()
+        y = benchmark_func(x.detach().numpy())
+        y_tensor = torch.tensor(y, requires_grad=True)
+        y_tensor.backward()
+        optimizer.step()
+        trajectory.append(x.detach().numpy().copy())
+
+    return trajectory
+
+def visualize_benchmark(benchmark_func, optimizers, num_iterations, drop_rates):
+    """
+    Visualize the optimization trajectories for different optimizers and drop rates.
+    """
+    num_optimizers = len(optimizers)
+    num_drop_rates = len(drop_rates)
+
+    fig, axs = plt.subplots(num_optimizers, num_drop_rates, figsize=(12, 8), sharex=True, sharey=True)
+
+    if num_optimizers == 1 and num_drop_rates == 1:
+        axs = [[axs]]
+    elif num_optimizers == 1:
+        axs = [axs]
+    elif num_drop_rates == 1:
+        axs = [[ax] for ax in axs]
+
+    for i, (optimizer_name, base_optimizer) in enumerate(optimizers.items()):
+        for j, drop_rate in enumerate(drop_rates):
+            x = torch.randn(2, requires_grad=True)
+            optimizer = apply_dropgrad(base_optimizer, drop_rate)
+            trajectory = optimize(optimizer, x, benchmark_func, num_iterations)
+
+            x_values = [point[0] for point in trajectory]
+            y_values = [point[1] for point in trajectory]
+
+            axs[i][j].plot(x_values, y_values, marker='o', markersize=2, linestyle='-', linewidth=0.5)
+            axs[i][j].set_title(f"{optimizer_name} (Drop Rate: {drop_rate})")
+
+    fig.suptitle(f"Optimization Trajectories on {benchmark_func.__name__}", fontsize=16)
+    plt.tight_layout()
+    plt.show()
+
+def main():
+    num_iterations = 1000
+    optimizers = {
+        "SGD": SGD([torch.randn(2, requires_grad=True)], lr=0.01),
+        "Adam": Adam([torch.randn(2, requires_grad=True)], lr=0.01),
+        "AdamW": AdamW([torch.randn(2, requires_grad=True)], lr=0.01),
+        "Adagrad": Adagrad([torch.randn(2, requires_grad=True)], lr=0.01),
+        "Adadelta": Adadelta([torch.randn(2, requires_grad=True)], lr=0.01),
+    }
+    drop_rates = [0.0, 0.1, 0.2]
+    benchmarks = [rosen, rastrigin, ackley]
+
+    for benchmark_func in benchmarks:
+        visualize_benchmark(benchmark_func, optimizers, num_iterations, drop_rates)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/examples/vit_experiments/mathematical_analysis.py b/examples/vit_experiments/mathematical_analysis.py
new file mode 100644
index 0000000..5af28e1
--- /dev/null
+++ b/examples/vit_experiments/mathematical_analysis.py
@@ -0,0 +1,101 @@
+"""
+Mathematical Analysis of DropGrad's Effect on Optimizers
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+def sgd_update(params, grads, lr):
+    """
+    Stochastic Gradient Descent (SGD) update rule.
+    """
+    return params - lr * grads
+
+def adam_update(params, grads, m, v, t, lr, beta1, beta2, eps):
+    """
+    Adam update rule.
+    """
+    m = beta1 * m + (1 - beta1) * grads
+    v = beta2 * v + (1 - beta2) * (grads ** 2)
+    m_hat = m / (1 - beta1 ** t)
+    v_hat = v / (1 - beta2 ** t)
+    return params - lr * m_hat / (np.sqrt(v_hat) + eps), m, v
+
+def lion_update(params, grads, m, t, lr, beta1, beta2):
+    """
+    Lion update rule.
+    """
+    m = beta1 * m + (1 - beta1) * grads
+    m_hat = m / (1 - beta1 ** t)
+    update = lr * m_hat / (np.abs(m_hat) + beta2)
+    return params - update, m
+
+def dropgrad_update(params, grads, drop_rate):
+    """
+    DropGrad modification of the gradient update.
+    """
+    mask = np.random.binomial(1, 1 - drop_rate, size=grads.shape)
+    return params - (grads * mask) / (1 - drop_rate)
+
+def analyze_optimizer(optimizer, num_iterations, drop_rate=0.0):
+    """
+    Analyze the effect of DropGrad on an optimizer.
+    """
+    params = np.zeros(10)
+    m = np.zeros_like(params)
+    v = np.zeros_like(params)
+    lr = 0.01
+    beta1 = 0.9
+    beta2 = 0.999
+    eps = 1e-8
+
+    trajectories = []
+    for _ in range(num_iterations):
+        grads = np.random.randn(*params.shape)
+        if optimizer == "sgd":
+            params = sgd_update(params, grads, lr)
+        elif optimizer == "adam":
+            params, m, v = adam_update(params, grads, m, v, _ + 1, lr, beta1, beta2, eps)
+        elif optimizer == "lion":
+            params, m = lion_update(params, grads, m, _ + 1, lr, beta1, beta2)
+
+        if drop_rate > 0:
+            params = dropgrad_update(params, grads, drop_rate)
+
+        trajectories.append(params.copy())
+
+    return np.array(trajectories)
+
+def visualize_trajectories(optimizer, num_iterations, drop_rates):
+    """
+    Visualize the optimization trajectories with different drop rates.
+    """
+    trajectories = []
+    for drop_rate in drop_rates:
+        trajectories.append(analyze_optimizer(optimizer, num_iterations, drop_rate))
+
+    plt.figure(figsize=(8, 6))
+    for i, drop_rate in enumerate(drop_rates):
+        plt.plot(trajectories[i][:, 0], trajectories[i][:, 1], label=f"Drop Rate: {drop_rate}")
+    plt.xlabel("Parameter 1")
+    plt.ylabel("Parameter 2")
+    plt.title(f"Optimization Trajectories ({optimizer.upper()})")
+    plt.legend()
+    plt.tight_layout()
+    plt.show()
+
+def main():
+    num_iterations = 1000
+    drop_rates = [0.0, 0.1, 0.2, 0.3]
+
+    # Analyze SGD optimizer
+    visualize_trajectories("sgd", num_iterations, drop_rates)
+
+    # Analyze Adam optimizer
+    visualize_trajectories("adam", num_iterations, drop_rates)
+
+    # Analyze Lion optimizer
+    visualize_trajectories("lion", num_iterations, drop_rates)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file