diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a1709a9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +results/* +*.pyc +plr_exercise.egg-info/* \ No newline at end of file diff --git a/plr_exercise/models/__init__.py b/plr_exercise/models/__init__.py new file mode 100644 index 0000000..450aad0 --- /dev/null +++ b/plr_exercise/models/__init__.py @@ -0,0 +1 @@ +from .cnn import Net diff --git a/plr_exercise/models/cnn.py b/plr_exercise/models/cnn.py new file mode 100644 index 0000000..c48bf57 --- /dev/null +++ b/plr_exercise/models/cnn.py @@ -0,0 +1,31 @@ +from __future__ import print_function +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Net(nn.Module): + def __init__(self): + + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 32, 3, 1) + self.conv2 = nn.Conv2d(32, 64, 3, 1) + self.dropout1 = nn.Dropout(0.25) + self.dropout2 = nn.Dropout(0.5) + self.fc1 = nn.Linear(9216, 128) + self.fc2 = nn.Linear(128, 10) + + def forward(self, x): + x = self.conv1(x) + x = F.relu(x) + x = self.conv2(x) + x = F.relu(x) + x = F.max_pool2d(x, 2) + x = self.dropout1(x) + x = torch.flatten(x, 1) + x = self.fc1(x) + x = F.relu(x) + x = self.dropout2(x) + x = self.fc2(x) + output = F.log_softmax(x, dim=1) + return output diff --git a/timing.py b/scripts/timing.py similarity index 100% rename from timing.py rename to scripts/timing.py diff --git a/train.py b/scripts/train.py similarity index 60% rename from train.py rename to scripts/train.py index 2cfae71..e29ebda 100644 --- a/train.py +++ b/scripts/train.py @@ -1,49 +1,25 @@ from __future__ import print_function import argparse import torch -import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.optim.lr_scheduler import StepLR +from plr_exercise.models import Net +from plr_exercise import PLR_ROOT_DIR +import wandb +import os +import optuna -class Net(nn.Module): - def __init__(self): - - - super(Net, self).__init__() - self.conv1 = nn.Conv2d(1, 32, 3, 1) - self.conv2 = nn.Conv2d( 32, 64, 3, 1) - self.dropout1 = nn.Dropout(0.25) - self.dropout2 = nn.Dropout(0.5) - self.fc1 = nn.Linear(9216, 128) - self.fc2 = nn.Linear(128, 10) - - def forward(self, x): - x = self.conv1(x) - x = F.relu(x) - x = self.conv2(x) - x = F.relu(x) - x = F.max_pool2d(x, 2) - x = self.dropout1(x) - x = torch.flatten(x, 1) - x = self.fc1(x) - x = F.relu(x) - x = self.dropout2(x) - x = self.fc2(x) - output = F.log_softmax(x, dim=1) - return output - - -def train(args, model, device, train_loader, optimizer, epoch): +def train(args, model, device, train_loader, optimizer, epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): - data, target = data.to(device), target.to(device) + data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) - loss = F.nll_loss( output, target) + loss = F.nll_loss(output, target) loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: @@ -52,10 +28,11 @@ def train(args, model, device, train_loader, optimizer, epoch): epoch, batch_idx * len(data), len(train_loader.dataset), - 100.0 * batch_idx / len(train_loader), + 100.0 * batch_idx / len(train_loader), loss.item(), ) ) + wandb.log({"epoch": epoch, "train_loss": loss.item()}) if args.dry_run: break @@ -70,17 +47,19 @@ def test(model, device, test_loader, epoch): data, target = data.to(device), target.to(device) output = model(data) - test_loss += F.nll_loss( output, target, reduction="sum").item() # sum up batch loss + test_loss += F.nll_loss(output, target, reduction="sum").item() # sum up batch loss pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability - correct += pred.eq(target.view_as(pred) ).sum().item() + correct += pred.eq(target.view_as(pred)).sum().item() - test_loss /= len(test_loader.dataset ) + test_loss /= len(test_loader.dataset) print( "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format( test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset) ) ) + wandb.log({"test_loss": test_loss, "epoch": epoch}) + return test_loss def main(): @@ -93,7 +72,7 @@ def main(): "--test-batch-size", type=int, default=1000, metavar="N", help="input batch size for testing (default: 1000)" ) parser.add_argument("--epochs", type=int, default=2, metavar="N", help="number of epochs to train (default: 14)") - parser.add_argument("--lr", type=float, default=1.0, metavar="LR", help="learning rate (default: 1.0)") + # parser.add_argument("--lr", type=float, default=1.0, metavar="LR", help="learning rate (default: 1.0)") parser.add_argument("--gamma", type=float, default=0.7, metavar="M", help="Learning rate step gamma (default: 0.7)") parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training") parser.add_argument("--dry-run", action="store_true", default=False, help="quickly check a single pass") @@ -107,6 +86,18 @@ def main(): ) parser.add_argument("--save-model", action="store_true", default=False, help="For Saving the current Model") args = parser.parse_args() + + wandb.login() + os.makedirs(os.path.join(PLR_ROOT_DIR, "results"), exist_ok=True) + run = wandb.init( + dir=os.path.join(PLR_ROOT_DIR, "results"), + project="plr-project", + config=args, + settings=wandb.Settings(code_dir=PLR_ROOT_DIR), + ) + include_fn = lambda path, root: path.endswith(".py") or path.endswith(".yaml") + run.log_code(name="source_files", root=PLR_ROOT_DIR, include_fn=include_fn) + use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) @@ -122,24 +113,45 @@ def main(): cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True} train_kwargs.update(cuda_kwargs) test_kwargs.update(cuda_kwargs) - transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) dataset1 = datasets.MNIST("../data", train=True, download=True, transform=transform) dataset2 = datasets.MNIST("../data", train=False, transform=transform) train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs) test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs) - model = Net().to(device) - optimizer = optim.Adam(model.parameters(), lr=args.lr) + def objective(trial): + # Optuna variables + lr = trial.suggest_float("lr", 1e-4, 1e-1, log=True) + # epochs = trial.suggest_int("epochs", 1, 3) + gamma = trial.suggest_float("gamma", 0.5, 0.9) + + model = Net().to(device) + optimizer = optim.Adam(model.parameters(), lr=lr) + scheduler = StepLR(optimizer, step_size=1, gamma=gamma) + + # Training loop + for epoch in range(args.epochs): + train(args, model, device, train_loader, optimizer, epoch) + test_loss = test(model, device, test_loader, epoch) + scheduler.step() + + return test_loss + + study = optuna.create_study(direction="minimize") + study.optimize(objective, n_trials=5) + + # Save or print the best hyperparameters + print("Best trial:") + trial = study.best_trial + print(f" Value: {trial.value}") + print(" Params: ") + for key, value in trial.params.items(): + print(f" {key}: {value}") - scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) - for epoch in range(args.epochs): - train(args, model, device, train_loader, optimizer, epoch) - test(model, device, test_loader, epoch) - scheduler.step() + # if args.save_model: + # torch.save(model.state_dict(), "mnist_cnn.pt") - if args.save_model: - torch.save(model.state_dict(), "mnist_cnn.pt") + wandb.finish() if __name__ == "__main__":