diff --git a/dataloader.py b/dataloader.py
new file mode 100644
index 0000000..2fc9dd2
--- /dev/null
+++ b/dataloader.py
@@ -0,0 +1,83 @@
+import os
+from torch.utils.data import Dataset, DataLoader
+import numpy as np
+
+# 전처리를 위한 라이브러리
+import cv2
+from pycocotools.coco import COCO
+# import torchvision
+# import torchvision.transforms as transforms
+# import albumentations as A
+# from albumentations.pytorch import ToTensorV2
+
+def get_classname(classID, cats):
+    for i in range(len(cats)):
+        if cats[i]['id']==classID:
+            return cats[i]['name']
+    return "None"
+
+class CustomDataLoader(Dataset):
+    """COCO format"""
+    def __init__(self, data_dir, mode = 'train', transform = None):
+        super().__init__()
+        self.mode = mode
+        self.transform = transform
+        self.coco = COCO(data_dir)
+        self.dataset_path = '../input/data/'
+        self.category_names = ['Backgroud', 'UNKNOWN', 'General trash', 'Paper', 'Paper pack', 'Metal', 'Glass', 'Plastic', 'Styrofoam', 'Plastic bag', 'Battery', 'Clothing']
+        
+    def __getitem__(self, index: int):
+        # dataset이 index되어 list처럼 동작
+        image_id = self.coco.getImgIds(imgIds=index)
+        image_infos = self.coco.loadImgs(image_id)[0]
+        
+        # cv2 를 활용하여 image 불러오기
+        images = cv2.imread(os.path.join(self.dataset_path, image_infos['file_name']))
+        images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.uint8)
+        #images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32)
+        # images /= 255.0
+        
+        if (self.mode in ('train', 'val')):
+            ann_ids = self.coco.getAnnIds(imgIds=image_infos['id'])
+            anns = self.coco.loadAnns(ann_ids)
+
+            # Load the categories in a variable
+            cat_ids = self.coco.getCatIds()
+            cats = self.coco.loadCats(cat_ids)
+
+            # masks : size가 (height x width)인 2D
+            # 각각의 pixel 값에는 "category id + 1" 할당
+            # Background = 0
+            masks = np.zeros((image_infos["height"], image_infos["width"]))
+            # Unknown = 1, General trash = 2, ... , Cigarette = 11
+            for i in range(len(anns)):
+                className = get_classname(anns[i]['category_id'], cats)
+                pixel_value = self.category_names.index(className)
+                masks = np.maximum(self.coco.annToMask(anns[i])*pixel_value, masks)
+            masks = masks.astype(np.float32)
+
+            # transform -> albumentations 라이브러리 활용
+            if self.transform is not None:
+                transformed = self.transform(image=images, mask=masks)
+                images = transformed["image"]
+                masks = transformed["mask"]
+            
+            return images, masks, image_infos
+        
+        if self.mode == 'test':
+            # transform -> albumentations 라이브러리 활용
+            if self.transform is not None:
+                transformed = self.transform(image=images)
+                images = transformed["image"]
+            
+            return images, image_infos
+    
+    
+    def __len__(self) -> int:
+        # 전체 dataset의 size를 return
+        return len(self.coco.getImgIds())
+
+    
+    # collate_fn needs for batch
+    def collate_fn(batch):
+        return tuple(zip(*batch))
\ No newline at end of file
diff --git a/evaluate.py b/evaluate.py
new file mode 100644
index 0000000..a405f03
--- /dev/null
+++ b/evaluate.py
@@ -0,0 +1,143 @@
+# https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/utils.py
+import numpy as np
+import torch
+from my_utils import *
+
+
+def validation(epoch, model, data_loader, criterion, device, n_class=12):
+    print('Start validation #{}'.format(epoch))
+    model.eval()
+    with torch.no_grad():
+        total_loss = 0
+        cnt = 0
+        # mIoU_list = []
+        hist = np.zeros((n_class, n_class))
+        for step, (images, masks, _) in enumerate(data_loader):
+            
+            images = torch.stack(images)       # (batch, channel, height, width)
+            masks = torch.stack(masks).long()  # (batch, channel, height, width)
+
+            images, masks = images.to(device), masks.to(device)            
+
+            outputs = model(images)
+            loss = criterion(outputs, masks)
+            total_loss += loss
+            cnt += 1
+            
+            outputs = torch.argmax(outputs, dim=1).detach().cpu().numpy()
+
+            hist = add_hist(hist, masks.detach().cpu().numpy(), outputs, n_class=n_class)
+            # mIoU = label_accuracy_score(masks.detach().cpu().numpy(), outputs, n_class=12)[2]
+        acc, acc_cls, mean_iu, fwavacc = label_accuracy_score2(hist)
+        # mIoU_list.append(mIoU)
+            
+        avrg_loss = total_loss / cnt
+        print('Validation #{}  Average Loss: {:.4f}, mIoU: {:.4f}, acc: {:.4f}, acc_cls: {:.4f}'.format(epoch, avrg_loss, mean_iu, acc, acc_cls))
+
+    return avrg_loss, mean_iu
+
+
+def validation3(epoch, model, data_loader, criterion, device, n_class=12):
+    print('Start validation #{}'.format(epoch))
+    model.eval()
+    with torch.no_grad():
+        total_loss = 0
+        cnt = 0
+        mIoU_list = []
+        hist = np.zeros((n_class, n_class))
+        all_iou = []
+        for step, (images, masks, _) in enumerate(data_loader):         
+
+            images = torch.stack(images)       # (batch, channel, height, width)
+            masks = torch.stack(masks).long()  # (batch, channel, height, width)
+
+            images, masks = images.to(device), masks.to(device)  
+
+            outputs = model(images)
+            loss = criterion(outputs, masks)
+            total_loss += loss
+            cnt += 1
+            
+            outputs = torch.argmax(outputs, dim=1).detach().cpu().numpy()
+            
+            hist = add_hist(hist, masks.detach().cpu().numpy(), outputs, n_class=n_class)
+
+            mIoU = label_accuracy_score(masks.detach().cpu().numpy(), outputs, n_class=12)
+            mIoU_list.append(mIoU)
+            
+            batch_iou = batch_iou_score(masks.detach().cpu().numpy(), outputs, len(outputs))
+            all_iou.append(batch_iou)
+            
+        avrg_loss = total_loss / cnt
+        miou2 = mIoU_score(hist)
+        miou3 = np.mean(all_iou)
+        print('Validation #{}  Average Loss: {:.4f}, mIoU2: {:.4f}, mIOU3: {:.4f}'.format(epoch, avrg_loss, miou2, miou3))
+
+    return avrg_loss, np.mean(mIoU_list), miou2, miou3
+
+
+def _fast_hist(label_true, label_pred, n_class):
+    mask = (label_true >= 0) & (label_true < n_class)
+    hist = np.bincount(
+        n_class * label_true[mask].astype(int) +
+        label_pred[mask], minlength=n_class ** 2).reshape(n_class, n_class)
+    return hist
+
+
+def label_accuracy_score(label_trues, label_preds, n_class=12):
+    hist = np.zeros((n_class, n_class))
+    for lt, lp in zip(label_trues, label_preds):
+        hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
+    with np.errstate(divide='ignore', invalid='ignore'):
+        iu = np.diag(hist) / (
+            hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)
+        )
+    mean_iu = np.nanmean(iu)
+    return mean_iu
+
+def label_accuracy_score2(hist):
+    """
+    Returns accuracy score evaluation result.
+      - [acc]: overall accuracy
+      - [acc_cls]: mean accuracy
+      - [mean_iu]: mean IU
+      - [fwavacc]: fwavacc
+    """
+    acc = np.diag(hist).sum() / hist.sum()
+    with np.errstate(divide='ignore', invalid='ignore'):
+        acc_cls = np.diag(hist) / hist.sum(axis=1)
+    acc_cls = np.nanmean(acc_cls)
+
+    with np.errstate(divide='ignore', invalid='ignore'):
+        iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
+    mean_iu = np.nanmean(iu)
+
+    freq = hist.sum(axis=1) / hist.sum()
+    fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
+    return acc, acc_cls, mean_iu, fwavacc
+
+
+def add_hist(hist, label_trues, label_preds, n_class):
+    for lt, lp in zip(label_trues, label_preds):
+        hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
+    return hist
+
+
+def batch_iou_score(label_trues, label_preds, batch_size, n_class=12):
+    hist = np.zeros((n_class, n_class))
+    batch_iou = 0
+    for lt, lp in zip(label_trues, label_preds):
+        hist = _fast_hist(lt.flatten(), lp.flatten(), n_class)
+        with np.errstate(divide='ignore', invalid='ignore'):
+            iu = np.diag(hist) / (
+                hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)
+            )
+            batch_iou += np.nanmean(iu) / batch_size
+    return batch_iou
+
+
+def mIoU_score(hist):
+    with np.errstate(divide='ignore', invalid='ignore'):
+        iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
+    mean_iu = np.nanmean(iu)
+    return mean_iu
\ No newline at end of file
diff --git a/my_utils.py b/my_utils.py
new file mode 100644
index 0000000..c69c2e6
--- /dev/null
+++ b/my_utils.py
@@ -0,0 +1,35 @@
+import os
+import glob
+import torch
+import numpy as np
+
+val_every = 1 
+
+saved_dir = './saved'
+if not os.path.isdir(saved_dir):                                                           
+    os.mkdir(saved_dir)
+    
+def save_model(model, saved_dir, file_name='default.pt'):
+    check_point = {'net': model.state_dict()}
+    output_path = os.path.join(saved_dir, file_name)
+    torch.save(model.state_dict(), output_path)
+
+
+def load_model(model, device, saved_dir, file_name='default.pt'):
+    model_path = os.path.join(saved_dir, file_name)
+    checkpoint = torch.load(model_path, map_location=device)
+    model.load_state_dict(checkpoint)
+
+
+def calculate_parameter(model, print_param=False):
+    n_param = 0
+    n_conv = 0
+    for p_idx,(param_name,param) in enumerate(model.named_parameters()):
+        if param.requires_grad:
+            param_numpy = param.detach().cpu().numpy() # to numpy array 
+            n_param += len(param_numpy.reshape(-1))
+            if print_param==True:
+                print ("[%d] name:[%s] shape:[%s]."%(p_idx,param_name,param_numpy.shape))
+            if "conv" in param_name: n_conv+=1
+    print("-"*50+f"\nTotal number of parameters: [{n_param:,d}]\n"+"-"*50)
+    print(f"Total number of Conv layer : {n_conv}")
\ No newline at end of file
diff --git a/scheduler.py b/scheduler.py
new file mode 100644
index 0000000..789d255
--- /dev/null
+++ b/scheduler.py
@@ -0,0 +1,88 @@
+import math
+import torch
+from torch.optim.lr_scheduler import _LRScheduler
+
+class CosineAnnealingWarmupRestarts(_LRScheduler):
+    """
+        optimizer (Optimizer): Wrapped optimizer.
+        first_cycle_steps (int): First cycle step size.
+        cycle_mult(float): Cycle steps magnification. Default: -1.
+        max_lr(float): First cycle's max learning rate. Default: 0.1.
+        min_lr(float): Min learning rate. Default: 0.001.
+        warmup_steps(int): Linear warmup step size. Default: 0.
+        gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
+        last_epoch (int): The index of last epoch. Default: -1.
+    """
+    
+    def __init__(self,
+                 optimizer : torch.optim.Optimizer,
+                 first_cycle_steps : int,
+                 cycle_mult : float = 1.,
+                 max_lr : float = 0.1,
+                 min_lr : float = 0.001,
+                 warmup_steps : int = 0,
+                 gamma : float = 1.,
+                 last_epoch : int = -1
+        ):
+        assert warmup_steps < first_cycle_steps
+        
+        self.first_cycle_steps = first_cycle_steps # first cycle step size
+        self.cycle_mult = cycle_mult # cycle steps magnification
+        self.base_max_lr = max_lr # first max learning rate
+        self.max_lr = max_lr # max learning rate in the current cycle
+        self.min_lr = min_lr # min learning rate
+        self.warmup_steps = warmup_steps # warmup step size
+        self.gamma = gamma # decrease rate of max learning rate by cycle
+        
+        self.cur_cycle_steps = first_cycle_steps # first cycle step size
+        self.cycle = 0 # cycle count
+        self.step_in_cycle = last_epoch # step size of the current cycle
+        
+        super(CosineAnnealingWarmupRestarts, self).__init__(optimizer, last_epoch)
+        
+        # set learning rate min_lr
+        self.init_lr()
+    
+    def init_lr(self):
+        self.base_lrs = []
+        for param_group in self.optimizer.param_groups:
+            param_group['lr'] = self.min_lr
+            self.base_lrs.append(self.min_lr)
+    
+    def get_lr(self):
+        if self.step_in_cycle == -1:
+            return self.base_lrs
+        elif self.step_in_cycle < self.warmup_steps:
+            return [(self.max_lr - base_lr)*self.step_in_cycle / self.warmup_steps + base_lr for base_lr in self.base_lrs]
+        else:
+            return [base_lr + (self.max_lr - base_lr) \
+                    * (1 + math.cos(math.pi * (self.step_in_cycle-self.warmup_steps) \
+                                    / (self.cur_cycle_steps - self.warmup_steps))) / 2
+                    for base_lr in self.base_lrs]
+
+    def step(self, epoch=None):
+        if epoch is None:
+            epoch = self.last_epoch + 1
+            self.step_in_cycle = self.step_in_cycle + 1
+            if self.step_in_cycle >= self.cur_cycle_steps:
+                self.cycle += 1
+                self.step_in_cycle = self.step_in_cycle - self.cur_cycle_steps
+                self.cur_cycle_steps = int((self.cur_cycle_steps - self.warmup_steps) * self.cycle_mult) + self.warmup_steps
+        else:
+            if epoch >= self.first_cycle_steps:
+                if self.cycle_mult == 1.:
+                    self.step_in_cycle = epoch % self.first_cycle_steps
+                    self.cycle = epoch // self.first_cycle_steps
+                else:
+                    n = int(math.log((epoch / self.first_cycle_steps * (self.cycle_mult - 1) + 1), self.cycle_mult))
+                    self.cycle = n
+                    self.step_in_cycle = epoch - int(self.first_cycle_steps * (self.cycle_mult ** n - 1) / (self.cycle_mult - 1))
+                    self.cur_cycle_steps = self.first_cycle_steps * self.cycle_mult ** (n)
+            else:
+                self.cur_cycle_steps = self.first_cycle_steps
+                self.step_in_cycle = epoch
+                
+        self.max_lr = self.base_max_lr * (self.gamma**self.cycle)
+        self.last_epoch = math.floor(epoch)
+        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
+            param_group['lr'] = lr
\ No newline at end of file
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..179733e
--- /dev/null
+++ b/train.py
@@ -0,0 +1,267 @@
+#%%
+
+import os
+import random
+import time
+import json
+import wandb
+import warnings
+warnings.filterwarnings('ignore')
+
+import numpy as np
+import pandas as pd
+
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset, DataLoader
+import segmentation_models_pytorch as smp
+
+from tqdm import tqdm
+
+from pycocotools.coco import COCO
+import cv2
+import torchvision
+import torchvision.transforms as transforms
+import albumentations as A
+from albumentations.pytorch import ToTensorV2
+
+from adamp import AdamP
+
+import matplotlib.pyplot as plt
+# from natsort import natsorted
+from torch.cuda.amp import GradScaler, autocast
+
+from transformers import get_cosine_with_hard_restarts_schedule_with_warmup
+from unet import UNet3Plus, UNet3Plus_DeepSup, UNet3Plus_DeepSup_CGM, UNet3Plus_efficientnet_DeepSup_CGM, UNet3Plus_efficientnet, UNet3Plus_resnext50_32x4d
+from unet.efficientunet import *
+from unet.efficientnet import *
+import timm
+
+from my_utils import *
+from dataloader import *
+# from loss import *
+from scheduler import *
+from evaluate import *
+
+
+def collate_fn(batch):
+    return tuple(zip(*batch))
+
+def train():
+    wandb.init()
+
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+
+    batch_size = 8   # Mini-batch size
+    num_epochs = 20
+    learning_rate = 5e-5
+
+    # seed 고정
+    random_seed = 77
+    torch.manual_seed(random_seed)
+    torch.cuda.manual_seed(random_seed)
+    # torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    np.random.seed(random_seed)
+    random.seed(random_seed)
+
+
+    # train.json / validation.json / test.json 디렉토리 설정
+    dataset_path = '../input/data'
+    train_path = dataset_path + '/train.json'
+    val_path = dataset_path + '/val.json'
+
+    #mean, stds of train_all.json 
+    mean=(0.460, 0.440, 0.418)
+    std=(0.211, 0.208, 0.216)
+
+    train_transform = A.Compose([
+                                A.Resize(256, 256),
+                                #A.HorizontalFlip(p=0.5),
+                                # A.VerticalFlip(p=0.5),
+                                #A.RandomRotate90(p=0.5),
+                                #A.CLAHE(p=0.5),
+                                A.Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
+                                ToTensorV2()
+                                ])
+
+    val_transform = A.Compose([
+                            A.Resize(256, 256),
+                            # A.CLAHE(p=1.0),
+                            A.Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
+                            ToTensorV2()
+                            ])
+
+    train_dataset = CustomDataLoader(data_dir=train_path, mode='train', transform=train_transform)
+    val_dataset = CustomDataLoader(data_dir=val_path, mode='val', transform=val_transform)
+
+    # DataLoader
+    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
+                                            batch_size=batch_size,
+                                            shuffle=True,
+                                            num_workers=4,
+                                            collate_fn=collate_fn,
+                                            drop_last=True)
+
+    val_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
+                                            batch_size=batch_size,
+                                            shuffle=False,
+                                            num_workers=4,
+                                            collate_fn=collate_fn)
+    #saved_dir
+    val_every = 1 
+    saved_dir = './models'
+    if not os.path.isdir(saved_dir):                                                           
+        os.mkdir(saved_dir)
+
+    # model
+    # model = smp.Unet(
+    #     encoder_name='timm-efficientnet-b5',
+    #     encoder_weights='noisy-student', 
+    #     classes=12
+    # )
+    # model = smp.DeepLabV3Plus(
+    #     encoder_name='timm_-agenet',
+    #     classes=12
+    # )
+    # model = smp.UnetPlusPlus(
+    #     encoder_name='timm-efficientnet-b0',
+    #     encoder_weights='noisy-student',
+    #     classes=12
+    # )
+
+    # encoder = EfficientNet.encoder('efficientnet-b5', pretrained=True)
+    # model = UNet3Plus_efficientnet(encoder, n_classes=12)
+
+    # encoder = timm.create_model('swsl_resnext50_32x4d', pretrained=True)
+    # model = UNet3Plus_resnext50_32x4d(encoder, n_classes=12)
+    model = UNet3Plus_resnext50_32x4d(n_classes=12)
+
+    model.to(device)
+    wandb.watch(model)
+
+    calculate_parameter(model)
+
+
+    # train_loader의 output 결과(image 및 mask) 확인  
+    # for imgs, masks, image_infos in train_loader:
+    #     image_infos = image_infos[0]
+    #     temp_images = imgs
+    #     temp_masks = masks
+    #     break
+
+    # fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12, 12))
+
+    # print('image shape:', list(temp_images[2].shape))
+    # print('mask shape: ', list(temp_masks[2].shape))
+    # # print('Unique values, category of transformed mask : \n', [{int(i),category_names[int(i)]} for i in list(np.unique(temp_masks[0]))])
+
+    # ax1.imshow(temp_images[2].permute([1,2,0]))
+    # ax1.grid(False)
+    # ax1.set_title("input image : {}".format(image_infos['file_name']), fontsize = 15)
+
+    # ax2.imshow(temp_masks[2])
+    # ax2.grid(False)
+    # ax2.set_title("masks : {}".format(image_infos['file_name']), fontsize = 15)
+
+    # plt.show()
+    # return 0
+
+    #tain
+    criterion = nn.CrossEntropyLoss()
+    optimizer = AdamP(model.parameters(), lr=learning_rate)
+    # optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
+    # scheduler = CosineAnnealingWarmupRestarts(optimizer, first_cycle_steps=500, max_lr=5e-5, min_lr=5e-7, warmup_steps=100)
+    #scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(optimizer, 300, 6540, 3)
+    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 10000)
+
+    # scaler = GradScaler()
+    print('Start training..')
+    best_loss = 9999999
+    best_mIoU = 0.0
+    for epoch in range(num_epochs):
+        
+        model.train()
+
+        for step, (images, masks, _) in tqdm(enumerate(train_loader)):
+            images = torch.stack(images)       # (batch, channel, height, width)
+            masks = torch.stack(masks).long()  # (batch, channel, height, width)
+            
+            # gpu 연산을 위해 device 할당
+            images, masks = images.to(device), masks.to(device)
+            
+            optimizer.zero_grad()
+            # with autocast():
+                # inference
+            outputs = model(images)
+                # loss 계산 (cross entropy loss)
+            loss = criterion(outputs, masks)
+            
+            
+            # scaler.scale(loss).backward()
+            # scaler.step(optimizer)
+            # scaler.update()
+            
+            loss.backward()
+            optimizer.step()
+            
+            # step 주기에 따른 loss 출력
+            if (step + 1) % 25 == 0:
+                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, LR : {:.6f}'.format(
+                    epoch+1, num_epochs, step+1, len(train_loader), loss.item(), scheduler.get_lr()[0]))
+                    #epoch+1, num_epochs, step+1, len(train_loader), loss.item(), learning_rate))
+            wandb.log({'LR': scheduler.get_lr()[0]})
+            scheduler.step()
+        # validation 주기에 따른 loss 출력 및 best model 저장
+        if (epoch + 1) % val_every == 0:
+            val_loss , _ , val_mIoU, val_mIoU2 = validation3(epoch + 1, model, val_loader, criterion, device)
+            wandb.log({"train_loss": loss.item(), "val_loss": val_loss, "val_mIoU": val_mIoU, "val_mIoU2": val_mIoU2})
+            # if avrg_loss < best_loss:
+            #     print('Best performance at epoch: {}'.format(epoch + 1))
+            #     print('Save model in', saved_dir)
+            #     best_loss = avrg_loss
+            #     save_model(model, saved_dir)
+            if best_mIoU < val_mIoU2:
+                print('Best performance at epoch: {}'.format(epoch + 1))
+                print('Save model in', saved_dir)
+                best_mIoU = val_mIoU2
+                save_model(model, saved_dir, file_name='UNet3Plus_resnext50_32x4d.pt')
+        
+        print('finish')
+
+
+def main():
+    train()
+    # project_name = 'se_resnext50_32x4d'
+    # count = 20
+    # sweep_config = {
+    #     'method': 'bayes'
+    # }
+    # metric = {
+    #     'name': 'val_mIoU',
+    #     'goal': 'maximize'   
+    # }
+    # sweep_config['metric'] = metric
+    
+    # parameters_dict = {
+
+    #     'BATCH_SIZE': {
+    #         'values': [8,16]
+    #     },
+    #     'LR': {
+    #         'value': (1e-5, 5e-6, 1e-6)
+    #     },
+    #     'project_name':{
+    #         'value': project_name
+    #     },
+    # }
+    # sweep_config['parameters'] = parameters_dict
+
+    # sweep_id = wandb.sweep(sweep_config, project=project_name)
+    # wandb.agent(sweep_id, train, count=count)
+
+
+if __name__ == '__main__':
+    main()
+# %%