From 0cc459984afe22f55e261a1944968c7df2bb42fc Mon Sep 17 00:00:00 2001 From: muditbhargava66 Date: Thu, 25 Apr 2024 04:10:25 -0400 Subject: [PATCH] =?UTF-8?q?Version=203=20features=20=F0=9F=9B=A0=EF=B8=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../benchmark_visualizations.py | 98 +++++++++++++++++ .../vit_experiments/mathematical_analysis.py | 101 ++++++++++++++++++ 2 files changed, 199 insertions(+) create mode 100644 examples/vit_experiments/benchmark_visualizations.py create mode 100644 examples/vit_experiments/mathematical_analysis.py diff --git a/examples/vit_experiments/benchmark_visualizations.py b/examples/vit_experiments/benchmark_visualizations.py new file mode 100644 index 0000000..9c7e13a --- /dev/null +++ b/examples/vit_experiments/benchmark_visualizations.py @@ -0,0 +1,98 @@ +""" +Visualizing the Behavior of DropGrad with Various Optimizers on Optimization Benchmarks +""" + +import numpy as np +import matplotlib.pyplot as plt +from scipy.optimize import rosen +from dropgrad import DropGrad +import torch +from torch.optim import SGD, Adam, AdamW, Adagrad, Adadelta + +def rastrigin(x): + A = 10 + n = len(x) + return A * n + sum(x**2 - A * np.cos(2 * np.pi * x)) + +def ackley(x): + a = 20 + b = 0.2 + c = 2 * np.pi + d = len(x) + sum_sq_term = -a * np.exp(-b * np.sqrt(np.sum(x**2) / d)) + cos_term = -np.exp(np.sum(np.cos(c * x)) / d) + return a + np.exp(1) + sum_sq_term + cos_term + +def apply_dropgrad(optimizer, drop_rate): + """ + Apply DropGrad to the optimizer if drop_rate is greater than 0. + """ + if drop_rate > 0: + return DropGrad(optimizer, drop_rate=drop_rate) + return optimizer + +def optimize(optimizer, x, benchmark_func, num_iterations): + """ + Run the optimizer on the benchmark function for a given number of iterations. + """ + trajectory = [x.detach().numpy().copy()] + + for _ in range(num_iterations): + optimizer.zero_grad() + y = benchmark_func(x.detach().numpy()) + y_tensor = torch.tensor(y, requires_grad=True) + y_tensor.backward() + optimizer.step() + trajectory.append(x.detach().numpy().copy()) + + return trajectory + +def visualize_benchmark(benchmark_func, optimizers, num_iterations, drop_rates): + """ + Visualize the optimization trajectories for different optimizers and drop rates. + """ + num_optimizers = len(optimizers) + num_drop_rates = len(drop_rates) + + fig, axs = plt.subplots(num_optimizers, num_drop_rates, figsize=(12, 8), sharex=True, sharey=True) + + if num_optimizers == 1 and num_drop_rates == 1: + axs = [[axs]] + elif num_optimizers == 1: + axs = [axs] + elif num_drop_rates == 1: + axs = [[ax] for ax in axs] + + for i, (optimizer_name, base_optimizer) in enumerate(optimizers.items()): + for j, drop_rate in enumerate(drop_rates): + x = torch.randn(2, requires_grad=True) + optimizer = apply_dropgrad(base_optimizer, drop_rate) + trajectory = optimize(optimizer, x, benchmark_func, num_iterations) + + x_values = [point[0] for point in trajectory] + y_values = [point[1] for point in trajectory] + + axs[i][j].plot(x_values, y_values, marker='o', markersize=2, linestyle='-', linewidth=0.5) + axs[i][j].set_title(f"{optimizer_name} (Drop Rate: {drop_rate})") + + fig.suptitle(f"Optimization Trajectories on {benchmark_func.__name__}", fontsize=16) + plt.tight_layout() + plt.show() + +def main(): + num_iterations = 1000 + optimizers = { + "SGD": SGD([torch.randn(2, requires_grad=True)], lr=0.01), + "Adam": Adam([torch.randn(2, requires_grad=True)], lr=0.01), + "AdamW": AdamW([torch.randn(2, requires_grad=True)], lr=0.01), + "Adagrad": Adagrad([torch.randn(2, requires_grad=True)], lr=0.01), + "Adadelta": Adadelta([torch.randn(2, requires_grad=True)], lr=0.01), + } + drop_rates = [0.0, 0.1, 0.2] + benchmarks = [rosen, rastrigin, ackley] + + for benchmark_func in benchmarks: + visualize_benchmark(benchmark_func, optimizers, num_iterations, drop_rates) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/vit_experiments/mathematical_analysis.py b/examples/vit_experiments/mathematical_analysis.py new file mode 100644 index 0000000..5af28e1 --- /dev/null +++ b/examples/vit_experiments/mathematical_analysis.py @@ -0,0 +1,101 @@ +""" +Mathematical Analysis of DropGrad's Effect on Optimizers +""" + +import numpy as np +import matplotlib.pyplot as plt + +def sgd_update(params, grads, lr): + """ + Stochastic Gradient Descent (SGD) update rule. + """ + return params - lr * grads + +def adam_update(params, grads, m, v, t, lr, beta1, beta2, eps): + """ + Adam update rule. + """ + m = beta1 * m + (1 - beta1) * grads + v = beta2 * v + (1 - beta2) * (grads ** 2) + m_hat = m / (1 - beta1 ** t) + v_hat = v / (1 - beta2 ** t) + return params - lr * m_hat / (np.sqrt(v_hat) + eps), m, v + +def lion_update(params, grads, m, t, lr, beta1, beta2): + """ + Lion update rule. + """ + m = beta1 * m + (1 - beta1) * grads + m_hat = m / (1 - beta1 ** t) + update = lr * m_hat / (np.abs(m_hat) + beta2) + return params - update, m + +def dropgrad_update(params, grads, drop_rate): + """ + DropGrad modification of the gradient update. + """ + mask = np.random.binomial(1, 1 - drop_rate, size=grads.shape) + return params - (grads * mask) / (1 - drop_rate) + +def analyze_optimizer(optimizer, num_iterations, drop_rate=0.0): + """ + Analyze the effect of DropGrad on an optimizer. + """ + params = np.zeros(10) + m = np.zeros_like(params) + v = np.zeros_like(params) + lr = 0.01 + beta1 = 0.9 + beta2 = 0.999 + eps = 1e-8 + + trajectories = [] + for _ in range(num_iterations): + grads = np.random.randn(*params.shape) + if optimizer == "sgd": + params = sgd_update(params, grads, lr) + elif optimizer == "adam": + params, m, v = adam_update(params, grads, m, v, _ + 1, lr, beta1, beta2, eps) + elif optimizer == "lion": + params, m = lion_update(params, grads, m, _ + 1, lr, beta1, beta2) + + if drop_rate > 0: + params = dropgrad_update(params, grads, drop_rate) + + trajectories.append(params.copy()) + + return np.array(trajectories) + +def visualize_trajectories(optimizer, num_iterations, drop_rates): + """ + Visualize the optimization trajectories with different drop rates. + """ + trajectories = [] + for drop_rate in drop_rates: + trajectories.append(analyze_optimizer(optimizer, num_iterations, drop_rate)) + + plt.figure(figsize=(8, 6)) + for i, drop_rate in enumerate(drop_rates): + plt.plot(trajectories[i][:, 0], trajectories[i][:, 1], label=f"Drop Rate: {drop_rate}") + plt.xlabel("Parameter 1") + plt.ylabel("Parameter 2") + plt.title(f"Optimization Trajectories ({optimizer.upper()})") + plt.legend() + plt.tight_layout() + plt.show() + +def main(): + num_iterations = 1000 + drop_rates = [0.0, 0.1, 0.2, 0.3] + + # Analyze SGD optimizer + visualize_trajectories("sgd", num_iterations, drop_rates) + + # Analyze Adam optimizer + visualize_trajectories("adam", num_iterations, drop_rates) + + # Analyze Lion optimizer + visualize_trajectories("lion", num_iterations, drop_rates) + +if __name__ == "__main__": + main() \ No newline at end of file