From cb446a62a38d487c0151762c8cd1ee52d6b15cd6 Mon Sep 17 00:00:00 2001 From: Kappi Patterson Date: Mon, 4 Mar 2024 21:47:19 +0100 Subject: [PATCH 1/7] Ran formatter --- train.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/train.py b/train.py index 2cfae71..b24b50d 100644 --- a/train.py +++ b/train.py @@ -11,10 +11,9 @@ class Net(nn.Module): def __init__(self): - super(Net, self).__init__() - self.conv1 = nn.Conv2d(1, 32, 3, 1) - self.conv2 = nn.Conv2d( 32, 64, 3, 1) + self.conv1 = nn.Conv2d(1, 32, 3, 1) + self.conv2 = nn.Conv2d(32, 64, 3, 1) self.dropout1 = nn.Dropout(0.25) self.dropout2 = nn.Dropout(0.5) self.fc1 = nn.Linear(9216, 128) @@ -36,14 +35,14 @@ def forward(self, x): return output -def train(args, model, device, train_loader, optimizer, epoch): +def train(args, model, device, train_loader, optimizer, epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): - data, target = data.to(device), target.to(device) + data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) - loss = F.nll_loss( output, target) + loss = F.nll_loss(output, target) loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: @@ -52,7 +51,7 @@ def train(args, model, device, train_loader, optimizer, epoch): epoch, batch_idx * len(data), len(train_loader.dataset), - 100.0 * batch_idx / len(train_loader), + 100.0 * batch_idx / len(train_loader), loss.item(), ) ) @@ -70,11 +69,11 @@ def test(model, device, test_loader, epoch): data, target = data.to(device), target.to(device) output = model(data) - test_loss += F.nll_loss( output, target, reduction="sum").item() # sum up batch loss + test_loss += F.nll_loss(output, target, reduction="sum").item() # sum up batch loss pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability - correct += pred.eq(target.view_as(pred) ).sum().item() + correct += pred.eq(target.view_as(pred)).sum().item() - test_loss /= len(test_loader.dataset ) + test_loss /= len(test_loader.dataset) print( "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format( @@ -132,7 +131,7 @@ def main(): model = Net().to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) - scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) + scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) for epoch in range(args.epochs): train(args, model, device, train_loader, optimizer, epoch) test(model, device, test_loader, epoch) From 8e6203895699ff2d3fac3475771613f892dfa7cb Mon Sep 17 00:00:00 2001 From: Kappi Patterson Date: Mon, 4 Mar 2024 21:59:50 +0100 Subject: [PATCH 2/7] Creating an installable package and installing it --- .gitignore | 1 + plr_exercise.egg-info/PKG-INFO | 10 ++++++++++ plr_exercise.egg-info/SOURCES.txt | 9 +++++++++ plr_exercise.egg-info/dependency_links.txt | 1 + plr_exercise.egg-info/requires.txt | 2 ++ plr_exercise.egg-info/top_level.txt | 1 + timing.py => scripts/timing.py | 0 train.py => scripts/train.py | 0 8 files changed, 24 insertions(+) create mode 100644 .gitignore create mode 100644 plr_exercise.egg-info/PKG-INFO create mode 100644 plr_exercise.egg-info/SOURCES.txt create mode 100644 plr_exercise.egg-info/dependency_links.txt create mode 100644 plr_exercise.egg-info/requires.txt create mode 100644 plr_exercise.egg-info/top_level.txt rename timing.py => scripts/timing.py (100%) rename train.py => scripts/train.py (100%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..68bcbc9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +results/ \ No newline at end of file diff --git a/plr_exercise.egg-info/PKG-INFO b/plr_exercise.egg-info/PKG-INFO new file mode 100644 index 0000000..0c29d2b --- /dev/null +++ b/plr_exercise.egg-info/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 2.1 +Name: plr_exercise +Version: 1.0.0 +Summary: A small example package +Author: Jonas Frey +Author-email: jonfrey@ethz.ch +Requires-Python: >=3.7 +License-File: LICENSE +Requires-Dist: numpy +Requires-Dist: torch>=1.21 diff --git a/plr_exercise.egg-info/SOURCES.txt b/plr_exercise.egg-info/SOURCES.txt new file mode 100644 index 0000000..6531f8c --- /dev/null +++ b/plr_exercise.egg-info/SOURCES.txt @@ -0,0 +1,9 @@ +LICENSE +README.md +setup.py +plr_exercise/__init__.py +plr_exercise.egg-info/PKG-INFO +plr_exercise.egg-info/SOURCES.txt +plr_exercise.egg-info/dependency_links.txt +plr_exercise.egg-info/requires.txt +plr_exercise.egg-info/top_level.txt \ No newline at end of file diff --git a/plr_exercise.egg-info/dependency_links.txt b/plr_exercise.egg-info/dependency_links.txt new file mode 100644 index 0000000..2b90dfa --- /dev/null +++ b/plr_exercise.egg-info/dependency_links.txt @@ -0,0 +1 @@ +https://download.pytorch.org/whl/torch-2.1.0+cu121-cp38-cp38-linux_x86_64.whl diff --git a/plr_exercise.egg-info/requires.txt b/plr_exercise.egg-info/requires.txt new file mode 100644 index 0000000..f50dca9 --- /dev/null +++ b/plr_exercise.egg-info/requires.txt @@ -0,0 +1,2 @@ +numpy +torch>=1.21 diff --git a/plr_exercise.egg-info/top_level.txt b/plr_exercise.egg-info/top_level.txt new file mode 100644 index 0000000..1279457 --- /dev/null +++ b/plr_exercise.egg-info/top_level.txt @@ -0,0 +1 @@ +plr_exercise diff --git a/timing.py b/scripts/timing.py similarity index 100% rename from timing.py rename to scripts/timing.py diff --git a/train.py b/scripts/train.py similarity index 100% rename from train.py rename to scripts/train.py From a7002dbea976d00e8dc55e6e859365d1d197f124 Mon Sep 17 00:00:00 2001 From: Kappi Patterson Date: Mon, 4 Mar 2024 22:10:40 +0100 Subject: [PATCH 3/7] removing egg info from tracking --- plr_exercise.egg-info/PKG-INFO | 10 ---------- plr_exercise.egg-info/SOURCES.txt | 9 --------- plr_exercise.egg-info/dependency_links.txt | 1 - plr_exercise.egg-info/requires.txt | 2 -- plr_exercise.egg-info/top_level.txt | 1 - 5 files changed, 23 deletions(-) delete mode 100644 plr_exercise.egg-info/PKG-INFO delete mode 100644 plr_exercise.egg-info/SOURCES.txt delete mode 100644 plr_exercise.egg-info/dependency_links.txt delete mode 100644 plr_exercise.egg-info/requires.txt delete mode 100644 plr_exercise.egg-info/top_level.txt diff --git a/plr_exercise.egg-info/PKG-INFO b/plr_exercise.egg-info/PKG-INFO deleted file mode 100644 index 0c29d2b..0000000 --- a/plr_exercise.egg-info/PKG-INFO +++ /dev/null @@ -1,10 +0,0 @@ -Metadata-Version: 2.1 -Name: plr_exercise -Version: 1.0.0 -Summary: A small example package -Author: Jonas Frey -Author-email: jonfrey@ethz.ch -Requires-Python: >=3.7 -License-File: LICENSE -Requires-Dist: numpy -Requires-Dist: torch>=1.21 diff --git a/plr_exercise.egg-info/SOURCES.txt b/plr_exercise.egg-info/SOURCES.txt deleted file mode 100644 index 6531f8c..0000000 --- a/plr_exercise.egg-info/SOURCES.txt +++ /dev/null @@ -1,9 +0,0 @@ -LICENSE -README.md -setup.py -plr_exercise/__init__.py -plr_exercise.egg-info/PKG-INFO -plr_exercise.egg-info/SOURCES.txt -plr_exercise.egg-info/dependency_links.txt -plr_exercise.egg-info/requires.txt -plr_exercise.egg-info/top_level.txt \ No newline at end of file diff --git a/plr_exercise.egg-info/dependency_links.txt b/plr_exercise.egg-info/dependency_links.txt deleted file mode 100644 index 2b90dfa..0000000 --- a/plr_exercise.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ -https://download.pytorch.org/whl/torch-2.1.0+cu121-cp38-cp38-linux_x86_64.whl diff --git a/plr_exercise.egg-info/requires.txt b/plr_exercise.egg-info/requires.txt deleted file mode 100644 index f50dca9..0000000 --- a/plr_exercise.egg-info/requires.txt +++ /dev/null @@ -1,2 +0,0 @@ -numpy -torch>=1.21 diff --git a/plr_exercise.egg-info/top_level.txt b/plr_exercise.egg-info/top_level.txt deleted file mode 100644 index 1279457..0000000 --- a/plr_exercise.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -plr_exercise From c779d9ce44a9a6ef092fe20049087e6818ee5499 Mon Sep 17 00:00:00 2001 From: Kappi Patterson Date: Mon, 4 Mar 2024 22:12:57 +0100 Subject: [PATCH 4/7] Updating .gitignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 68bcbc9..a1709a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -results/ \ No newline at end of file +results/* +*.pyc +plr_exercise.egg-info/* \ No newline at end of file From 10313fc8e2967bf9f8d5e90b02ae21296a3567ea Mon Sep 17 00:00:00 2001 From: Kappi Patterson Date: Mon, 4 Mar 2024 22:24:38 +0100 Subject: [PATCH 5/7] adding wandb --- plr_exercise/models/cnn.py | 31 ++++++++++++++++++++++++ scripts/train.py | 48 ++++++++++++++++---------------------- 2 files changed, 51 insertions(+), 28 deletions(-) create mode 100644 plr_exercise/models/cnn.py diff --git a/plr_exercise/models/cnn.py b/plr_exercise/models/cnn.py new file mode 100644 index 0000000..c48bf57 --- /dev/null +++ b/plr_exercise/models/cnn.py @@ -0,0 +1,31 @@ +from __future__ import print_function +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Net(nn.Module): + def __init__(self): + + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 32, 3, 1) + self.conv2 = nn.Conv2d(32, 64, 3, 1) + self.dropout1 = nn.Dropout(0.25) + self.dropout2 = nn.Dropout(0.5) + self.fc1 = nn.Linear(9216, 128) + self.fc2 = nn.Linear(128, 10) + + def forward(self, x): + x = self.conv1(x) + x = F.relu(x) + x = self.conv2(x) + x = F.relu(x) + x = F.max_pool2d(x, 2) + x = self.dropout1(x) + x = torch.flatten(x, 1) + x = self.fc1(x) + x = F.relu(x) + x = self.dropout2(x) + x = self.fc2(x) + output = F.log_softmax(x, dim=1) + return output diff --git a/scripts/train.py b/scripts/train.py index b24b50d..4167c01 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -1,38 +1,14 @@ from __future__ import print_function import argparse import torch -import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.optim.lr_scheduler import StepLR - - -class Net(nn.Module): - def __init__(self): - - super(Net, self).__init__() - self.conv1 = nn.Conv2d(1, 32, 3, 1) - self.conv2 = nn.Conv2d(32, 64, 3, 1) - self.dropout1 = nn.Dropout(0.25) - self.dropout2 = nn.Dropout(0.5) - self.fc1 = nn.Linear(9216, 128) - self.fc2 = nn.Linear(128, 10) - - def forward(self, x): - x = self.conv1(x) - x = F.relu(x) - x = self.conv2(x) - x = F.relu(x) - x = F.max_pool2d(x, 2) - x = self.dropout1(x) - x = torch.flatten(x, 1) - x = self.fc1(x) - x = F.relu(x) - x = self.dropout2(x) - x = self.fc2(x) - output = F.log_softmax(x, dim=1) - return output +from plr_exercise.models.cnn import Net +from plr_exercise import PLR_ROOT_DIR +import wandb +import os def train(args, model, device, train_loader, optimizer, epoch): @@ -55,6 +31,7 @@ def train(args, model, device, train_loader, optimizer, epoch): loss.item(), ) ) + wandb.log({"epoch": epoch, "train_loss": loss.item()}) if args.dry_run: break @@ -80,6 +57,7 @@ def test(model, device, test_loader, epoch): test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset) ) ) + wandb.log({"test_loss": test_loss, "epoch": epoch}) def main(): @@ -106,6 +84,18 @@ def main(): ) parser.add_argument("--save-model", action="store_true", default=False, help="For Saving the current Model") args = parser.parse_args() + + wandb.login() + os.makedirs(os.path.join(PLR_ROOT_DIR, "results"), exist_ok=True) + run = wandb.init( + dir=os.path.join(PLR_ROOT_DIR, "results"), + project="plr-project", + config=args, + settings=wandb.Settings(code_dir=PLR_ROOT_DIR), + ) + include_fn = lambda path, root: path.endswith(".py") or path.endswith(".yaml") + run.log_code(name="source_files", root=PLR_ROOT_DIR, include_fn=include_fn) + use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) @@ -140,6 +130,8 @@ def main(): if args.save_model: torch.save(model.state_dict(), "mnist_cnn.pt") + wandb.finish() + if __name__ == "__main__": main() From 3f056ac78f51b81aa499df6dae8465fd87496a0c Mon Sep 17 00:00:00 2001 From: Kappi Patterson Date: Mon, 4 Mar 2024 22:36:00 +0100 Subject: [PATCH 6/7] moving cnn.py out of train.py --- plr_exercise/models/__init__.py | 1 + plr_exercise/models/cnn.py | 31 +++++++++++++++++++++++++++++++ scripts/train.py | 29 +---------------------------- 3 files changed, 33 insertions(+), 28 deletions(-) create mode 100644 plr_exercise/models/__init__.py create mode 100644 plr_exercise/models/cnn.py diff --git a/plr_exercise/models/__init__.py b/plr_exercise/models/__init__.py new file mode 100644 index 0000000..450aad0 --- /dev/null +++ b/plr_exercise/models/__init__.py @@ -0,0 +1 @@ +from .cnn import Net diff --git a/plr_exercise/models/cnn.py b/plr_exercise/models/cnn.py new file mode 100644 index 0000000..c48bf57 --- /dev/null +++ b/plr_exercise/models/cnn.py @@ -0,0 +1,31 @@ +from __future__ import print_function +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Net(nn.Module): + def __init__(self): + + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 32, 3, 1) + self.conv2 = nn.Conv2d(32, 64, 3, 1) + self.dropout1 = nn.Dropout(0.25) + self.dropout2 = nn.Dropout(0.5) + self.fc1 = nn.Linear(9216, 128) + self.fc2 = nn.Linear(128, 10) + + def forward(self, x): + x = self.conv1(x) + x = F.relu(x) + x = self.conv2(x) + x = F.relu(x) + x = F.max_pool2d(x, 2) + x = self.dropout1(x) + x = torch.flatten(x, 1) + x = self.fc1(x) + x = F.relu(x) + x = self.dropout2(x) + x = self.fc2(x) + output = F.log_softmax(x, dim=1) + return output diff --git a/scripts/train.py b/scripts/train.py index b24b50d..bc1c2f3 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -1,38 +1,11 @@ from __future__ import print_function import argparse import torch -import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.optim.lr_scheduler import StepLR - - -class Net(nn.Module): - def __init__(self): - - super(Net, self).__init__() - self.conv1 = nn.Conv2d(1, 32, 3, 1) - self.conv2 = nn.Conv2d(32, 64, 3, 1) - self.dropout1 = nn.Dropout(0.25) - self.dropout2 = nn.Dropout(0.5) - self.fc1 = nn.Linear(9216, 128) - self.fc2 = nn.Linear(128, 10) - - def forward(self, x): - x = self.conv1(x) - x = F.relu(x) - x = self.conv2(x) - x = F.relu(x) - x = F.max_pool2d(x, 2) - x = self.dropout1(x) - x = torch.flatten(x, 1) - x = self.fc1(x) - x = F.relu(x) - x = self.dropout2(x) - x = self.fc2(x) - output = F.log_softmax(x, dim=1) - return output +from plr_exercise.models import Net def train(args, model, device, train_loader, optimizer, epoch): From 584528733e8ae83918d5aa5bcc49312fc55aaf9e Mon Sep 17 00:00:00 2001 From: Kappi Patterson Date: Tue, 5 Mar 2024 00:45:48 +0100 Subject: [PATCH 7/7] Ading a paarm sweep with optuna --- scripts/train.py | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index 4f7a73e..e29ebda 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -9,6 +9,7 @@ from plr_exercise import PLR_ROOT_DIR import wandb import os +import optuna def train(args, model, device, train_loader, optimizer, epoch): @@ -58,6 +59,7 @@ def test(model, device, test_loader, epoch): ) ) wandb.log({"test_loss": test_loss, "epoch": epoch}) + return test_loss def main(): @@ -70,7 +72,7 @@ def main(): "--test-batch-size", type=int, default=1000, metavar="N", help="input batch size for testing (default: 1000)" ) parser.add_argument("--epochs", type=int, default=2, metavar="N", help="number of epochs to train (default: 14)") - parser.add_argument("--lr", type=float, default=1.0, metavar="LR", help="learning rate (default: 1.0)") + # parser.add_argument("--lr", type=float, default=1.0, metavar="LR", help="learning rate (default: 1.0)") parser.add_argument("--gamma", type=float, default=0.7, metavar="M", help="Learning rate step gamma (default: 0.7)") parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training") parser.add_argument("--dry-run", action="store_true", default=False, help="quickly check a single pass") @@ -111,24 +113,43 @@ def main(): cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True} train_kwargs.update(cuda_kwargs) test_kwargs.update(cuda_kwargs) - transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) dataset1 = datasets.MNIST("../data", train=True, download=True, transform=transform) dataset2 = datasets.MNIST("../data", train=False, transform=transform) train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs) test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs) - model = Net().to(device) - optimizer = optim.Adam(model.parameters(), lr=args.lr) - - scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) - for epoch in range(args.epochs): - train(args, model, device, train_loader, optimizer, epoch) - test(model, device, test_loader, epoch) - scheduler.step() - - if args.save_model: - torch.save(model.state_dict(), "mnist_cnn.pt") + def objective(trial): + # Optuna variables + lr = trial.suggest_float("lr", 1e-4, 1e-1, log=True) + # epochs = trial.suggest_int("epochs", 1, 3) + gamma = trial.suggest_float("gamma", 0.5, 0.9) + + model = Net().to(device) + optimizer = optim.Adam(model.parameters(), lr=lr) + scheduler = StepLR(optimizer, step_size=1, gamma=gamma) + + # Training loop + for epoch in range(args.epochs): + train(args, model, device, train_loader, optimizer, epoch) + test_loss = test(model, device, test_loader, epoch) + scheduler.step() + + return test_loss + + study = optuna.create_study(direction="minimize") + study.optimize(objective, n_trials=5) + + # Save or print the best hyperparameters + print("Best trial:") + trial = study.best_trial + print(f" Value: {trial.value}") + print(" Params: ") + for key, value in trial.params.items(): + print(f" {key}: {value}") + + # if args.save_model: + # torch.save(model.state_dict(), "mnist_cnn.pt") wandb.finish()