forked from dingo-actual/dropgrad
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f568b27
commit 0cc4599
Showing
2 changed files
with
199 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
""" | ||
Visualizing the Behavior of DropGrad with Various Optimizers on Optimization Benchmarks | ||
""" | ||
|
||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
from scipy.optimize import rosen | ||
from dropgrad import DropGrad | ||
import torch | ||
from torch.optim import SGD, Adam, AdamW, Adagrad, Adadelta | ||
|
||
def rastrigin(x): | ||
A = 10 | ||
n = len(x) | ||
return A * n + sum(x**2 - A * np.cos(2 * np.pi * x)) | ||
|
||
def ackley(x): | ||
a = 20 | ||
b = 0.2 | ||
c = 2 * np.pi | ||
d = len(x) | ||
sum_sq_term = -a * np.exp(-b * np.sqrt(np.sum(x**2) / d)) | ||
cos_term = -np.exp(np.sum(np.cos(c * x)) / d) | ||
return a + np.exp(1) + sum_sq_term + cos_term | ||
|
||
def apply_dropgrad(optimizer, drop_rate): | ||
""" | ||
Apply DropGrad to the optimizer if drop_rate is greater than 0. | ||
""" | ||
if drop_rate > 0: | ||
return DropGrad(optimizer, drop_rate=drop_rate) | ||
return optimizer | ||
|
||
def optimize(optimizer, x, benchmark_func, num_iterations): | ||
""" | ||
Run the optimizer on the benchmark function for a given number of iterations. | ||
""" | ||
trajectory = [x.detach().numpy().copy()] | ||
|
||
for _ in range(num_iterations): | ||
optimizer.zero_grad() | ||
y = benchmark_func(x.detach().numpy()) | ||
y_tensor = torch.tensor(y, requires_grad=True) | ||
y_tensor.backward() | ||
optimizer.step() | ||
trajectory.append(x.detach().numpy().copy()) | ||
|
||
return trajectory | ||
|
||
def visualize_benchmark(benchmark_func, optimizers, num_iterations, drop_rates): | ||
""" | ||
Visualize the optimization trajectories for different optimizers and drop rates. | ||
""" | ||
num_optimizers = len(optimizers) | ||
num_drop_rates = len(drop_rates) | ||
|
||
fig, axs = plt.subplots(num_optimizers, num_drop_rates, figsize=(12, 8), sharex=True, sharey=True) | ||
|
||
if num_optimizers == 1 and num_drop_rates == 1: | ||
axs = [[axs]] | ||
elif num_optimizers == 1: | ||
axs = [axs] | ||
elif num_drop_rates == 1: | ||
axs = [[ax] for ax in axs] | ||
|
||
for i, (optimizer_name, base_optimizer) in enumerate(optimizers.items()): | ||
for j, drop_rate in enumerate(drop_rates): | ||
x = torch.randn(2, requires_grad=True) | ||
optimizer = apply_dropgrad(base_optimizer, drop_rate) | ||
trajectory = optimize(optimizer, x, benchmark_func, num_iterations) | ||
|
||
x_values = [point[0] for point in trajectory] | ||
y_values = [point[1] for point in trajectory] | ||
|
||
axs[i][j].plot(x_values, y_values, marker='o', markersize=2, linestyle='-', linewidth=0.5) | ||
axs[i][j].set_title(f"{optimizer_name} (Drop Rate: {drop_rate})") | ||
|
||
fig.suptitle(f"Optimization Trajectories on {benchmark_func.__name__}", fontsize=16) | ||
plt.tight_layout() | ||
plt.show() | ||
|
||
def main(): | ||
num_iterations = 1000 | ||
optimizers = { | ||
"SGD": SGD([torch.randn(2, requires_grad=True)], lr=0.01), | ||
"Adam": Adam([torch.randn(2, requires_grad=True)], lr=0.01), | ||
"AdamW": AdamW([torch.randn(2, requires_grad=True)], lr=0.01), | ||
"Adagrad": Adagrad([torch.randn(2, requires_grad=True)], lr=0.01), | ||
"Adadelta": Adadelta([torch.randn(2, requires_grad=True)], lr=0.01), | ||
} | ||
drop_rates = [0.0, 0.1, 0.2] | ||
benchmarks = [rosen, rastrigin, ackley] | ||
|
||
for benchmark_func in benchmarks: | ||
visualize_benchmark(benchmark_func, optimizers, num_iterations, drop_rates) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
""" | ||
Mathematical Analysis of DropGrad's Effect on Optimizers | ||
""" | ||
|
||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
|
||
def sgd_update(params, grads, lr): | ||
""" | ||
Stochastic Gradient Descent (SGD) update rule. | ||
""" | ||
return params - lr * grads | ||
|
||
def adam_update(params, grads, m, v, t, lr, beta1, beta2, eps): | ||
""" | ||
Adam update rule. | ||
""" | ||
m = beta1 * m + (1 - beta1) * grads | ||
v = beta2 * v + (1 - beta2) * (grads ** 2) | ||
m_hat = m / (1 - beta1 ** t) | ||
v_hat = v / (1 - beta2 ** t) | ||
return params - lr * m_hat / (np.sqrt(v_hat) + eps), m, v | ||
|
||
def lion_update(params, grads, m, t, lr, beta1, beta2): | ||
""" | ||
Lion update rule. | ||
""" | ||
m = beta1 * m + (1 - beta1) * grads | ||
m_hat = m / (1 - beta1 ** t) | ||
update = lr * m_hat / (np.abs(m_hat) + beta2) | ||
return params - update, m | ||
|
||
def dropgrad_update(params, grads, drop_rate): | ||
""" | ||
DropGrad modification of the gradient update. | ||
""" | ||
mask = np.random.binomial(1, 1 - drop_rate, size=grads.shape) | ||
return params - (grads * mask) / (1 - drop_rate) | ||
|
||
def analyze_optimizer(optimizer, num_iterations, drop_rate=0.0): | ||
""" | ||
Analyze the effect of DropGrad on an optimizer. | ||
""" | ||
params = np.zeros(10) | ||
m = np.zeros_like(params) | ||
v = np.zeros_like(params) | ||
lr = 0.01 | ||
beta1 = 0.9 | ||
beta2 = 0.999 | ||
eps = 1e-8 | ||
|
||
trajectories = [] | ||
for _ in range(num_iterations): | ||
grads = np.random.randn(*params.shape) | ||
if optimizer == "sgd": | ||
params = sgd_update(params, grads, lr) | ||
elif optimizer == "adam": | ||
params, m, v = adam_update(params, grads, m, v, _ + 1, lr, beta1, beta2, eps) | ||
elif optimizer == "lion": | ||
params, m = lion_update(params, grads, m, _ + 1, lr, beta1, beta2) | ||
|
||
if drop_rate > 0: | ||
params = dropgrad_update(params, grads, drop_rate) | ||
|
||
trajectories.append(params.copy()) | ||
|
||
return np.array(trajectories) | ||
|
||
def visualize_trajectories(optimizer, num_iterations, drop_rates): | ||
""" | ||
Visualize the optimization trajectories with different drop rates. | ||
""" | ||
trajectories = [] | ||
for drop_rate in drop_rates: | ||
trajectories.append(analyze_optimizer(optimizer, num_iterations, drop_rate)) | ||
|
||
plt.figure(figsize=(8, 6)) | ||
for i, drop_rate in enumerate(drop_rates): | ||
plt.plot(trajectories[i][:, 0], trajectories[i][:, 1], label=f"Drop Rate: {drop_rate}") | ||
plt.xlabel("Parameter 1") | ||
plt.ylabel("Parameter 2") | ||
plt.title(f"Optimization Trajectories ({optimizer.upper()})") | ||
plt.legend() | ||
plt.tight_layout() | ||
plt.show() | ||
|
||
def main(): | ||
num_iterations = 1000 | ||
drop_rates = [0.0, 0.1, 0.2, 0.3] | ||
|
||
# Analyze SGD optimizer | ||
visualize_trajectories("sgd", num_iterations, drop_rates) | ||
|
||
# Analyze Adam optimizer | ||
visualize_trajectories("adam", num_iterations, drop_rates) | ||
|
||
# Analyze Lion optimizer | ||
visualize_trajectories("lion", num_iterations, drop_rates) | ||
|
||
if __name__ == "__main__": | ||
main() |