diff --git "a/4.27 backbone \354\213\244\355\227\230.md" "b/4.27 backbone \354\213\244\355\227\230.md"
new file mode 100644
index 0000000..c0d4b04
--- /dev/null
+++ "b/4.27 backbone \354\213\244\355\227\230.md"
@@ -0,0 +1,23 @@
+## 제출 목록
+|모델|배치|시드|time/step|epoch|loss|val_loss|val_mIoU|LB score|
+|------|---|---|---|---|---|---|---|---|
+|DeepLabV3+, resnext50|16|77|4.8s|10|0.108|0.283|0.463|0.5754|
+|DeepLabV3+, seresnext50|16|77|6.8s|16|0.057|0.280|0.476|0.5670|
+|DeepLabV3+, resnext50|8|77|2.4s|9|0.098|0.294|0.451|0.5795|
+
+나머지는 너무 오래 걸려서 1~2에폭에서 관둠
+
+
+
+### 비고
+- scheduler: CosineAnnealingWarmup(LR_start=2e-6, LR_max=1e-4, T_0=20, T_up=2)
+- batch_size를 8로 주면 마지막(327번째) 배치에서 size관련 에러 발생
+ ㄴ train_loader에서 drop_last=True로 해결했으나 원인파악을 못함
+- V3에 비해 V3+가 파라미터 수도 작고, 러닝타임도 빠름
+- smp 모델에서 인코더 부분만 timm 모델로 교체하려 했으나, 동일 모델이라도 weight값이 아예 달랐음 (일단 시도 예정)
+
+
+
+### 배치16 기준 러닝타임(per step)
+- PSPNet : eff-b0(3.2s), eff-b3(3.5s), resnet34(6.0s)
+- DeepLabV3+ : resnext50(4.8s), eff-b0(6.7s), seresnext50(6.8s), eff-b3(9.5s), effi-b5(15.5s)
diff --git "a/4.28 \354\213\244\355\227\230 \353\252\251\353\241\235.md" "b/4.28 \354\213\244\355\227\230 \353\252\251\353\241\235.md"
new file mode 100644
index 0000000..adb9b75
--- /dev/null
+++ "b/4.28 \354\213\244\355\227\230 \353\252\251\353\241\235.md"
@@ -0,0 +1,31 @@
+## Overfitting?
+|모델|배치|시드|time/step|epoch|loss|val_loss|val_mIoU|LB score|
+|------|---|---|---|---|---|---|---|---|
+|DeepLabV3+, resnext50|8|77|2.4s|9|0.098|0.294|0.451|0.5795|
+|"|"|"|"|12|0.065|0.305|0.451|0.5696|
+
+CrossEntropyLoss 기준 대체적으로 train_loss < 0.1 부터 오버피팅되는 듯함
+위 결과는 증명을 위해 제출해본 내역
+
+## Resize(256)
+|모델|input_size|배치|시드|time/step|epoch|loss|val_loss|val_mIoU|LB score|
+|------|---|---|---|---|---|---|---|---|---|
+|DLV3+, resnext50|512|16|77|4.8s|10|0.108|0.283|0.463|0.5754|
+|"|256|16|77|0.8s|14|0.097|0.339|0.416|0.5609|
+|"|512|8|77|2.4s|14|0.098|0.294|0.451|0.5795|
+|"|256|8|77|0.45s|12|0.103|0.343|0.434|0.5640|
+
+다른 전처리 없이 train,test 모두 resize(256)
+러닝타임 대폭 감소(1epoch 13분->2분), 다양한 실험 진행하기에 좋을듯
+
+## (1-mIoU)+CE
+|모델|loss|배치|시드|time|epoch|loss|val_loss|val_mIoU|LB score|
+|------|---|---|---|---|---|---|---|---|---|
+|DLV3+, resnext50|CE|8|77|0.45s|12|0.103|0.343|0.434|0.5640|
+|"|(1-mIoU)*0.4|"|"|"|11|0.204|0.428|0.431|0.5653|
+|"|(1-mIoU)*0.7|"|"|"|14||0.510|0.417|-|
+|"|(1-mIoU)*0.2|"|"|"|13||0.394|0.431|-|
+
+
+## 기타 loss Function
+Focal, (1-mIoU)+Focal, weighted(CE) 모두 학습 안됨
diff --git a/README.md b/README.md
index 05338ea..e1e663b 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,18 @@
-# Team.분리수거잘하조
+# 안현진
-***
-*made by* [김동우](zxcvbnm1997@hanmail.net), [김익재](ijjustin.kim@gmail.com), [송민기](thdalsrl10@gmail.com), [안현진](dkdlel0227@naver.com), [최재하](k1smet1403@gmail.com), [황정훈](https://github.com/wjdgns7712)
+### train_wandb.py
+wandb autoML 실행 코드
-blah blah~
+### dataloader.py
+CustomDataset 클래스만 있음
+
+### scheduler.py
+custom scheduler : CosineAnnealingWarmUpRestarts()
+출처: https://gaussian37.github.io/dl-pytorch-lr_scheduler/
+
+### evaluate.py
+metric 및 validation 함수
+
+### utils.py
+save, load, submit, calculate_parameter
diff --git a/dataloader.py b/dataloader.py
new file mode 100644
index 0000000..b5b6ef2
--- /dev/null
+++ b/dataloader.py
@@ -0,0 +1,70 @@
+import os
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset, DataLoader
+import numpy as np
+import pandas as pd
+
+# 전처리를 위한 라이브러리
+import cv2
+from pycocotools.coco import COCO
+import torchvision
+import torchvision.transforms as transforms
+import albumentations as A
+from albumentations.pytorch import ToTensorV2
+
+
+def get_classname(classID, cats):
+ for i in range(len(cats)):
+ if cats[i]['id']==classID:
+ return cats[i]['name']
+ return "None"
+
+class CustomDataLoader(Dataset):
+ """COCO format"""
+ def __init__(self, data_dir, mode = 'train', transform = None):
+ super().__init__()
+ self.mode = mode
+ self.transform = transform
+ self.coco = COCO(data_dir)
+ self.dataset_path = 'input/data/'
+ self.category_names = ['Backgroud', 'UNKNOWN', 'General trash', 'Paper', 'Paper pack', 'Metal', 'Glass', 'Plastic', 'Styrofoam', 'Plastic bag', 'Battery', 'Clothing']
+
+ def __getitem__(self, index: int):
+ image_id = self.coco.getImgIds(imgIds=index)
+ image_infos = self.coco.loadImgs(image_id)[0]
+
+ images = cv2.imread(self.dataset_path+image_infos['file_name'])
+ images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32)
+ images /= 255.0
+
+ if (self.mode in ('train', 'val')):
+ ann_ids = self.coco.getAnnIds(imgIds=image_infos['id'])
+ anns = self.coco.loadAnns(ann_ids)
+ cat_ids = self.coco.getCatIds()
+ cats = self.coco.loadCats(cat_ids)
+
+ masks = np.zeros((image_infos["height"], image_infos["width"]))
+ for i in range(len(anns)):
+ className = get_classname(anns[i]['category_id'], cats)
+ pixel_value = self.category_names.index(className)
+ masks = np.maximum(self.coco.annToMask(anns[i])*pixel_value, masks)
+ masks = masks.astype(np.float32)
+
+ if self.transform is not None:
+ transformed = self.transform(image=images, mask=masks)
+ images = transformed["image"]
+ masks = transformed["mask"]
+
+ return images, masks #, image_infos
+
+ if self.mode == 'test':
+ if self.transform is not None:
+ transformed = self.transform(image=images)
+ images = transformed["image"]
+
+ return images #, image_infos
+
+
+ def __len__(self):
+ return len(self.coco.getImgIds())
\ No newline at end of file
diff --git a/evaluate.py b/evaluate.py
new file mode 100644
index 0000000..5b0dc40
--- /dev/null
+++ b/evaluate.py
@@ -0,0 +1,83 @@
+import torch
+import numpy as np
+from utils import *
+
+
+def validation(model, data_loader, criterion, device):
+ model.eval()
+ with torch.no_grad():
+ total_loss = 0
+ cnt = 0
+ mIoU_list = []
+ for step, (images, masks) in enumerate(data_loader):
+ images, masks = images.to(device), masks.long().to(device)
+
+ outputs = model(images)
+ loss = criterion(outputs, masks)
+ total_loss += loss
+ cnt += 1
+
+ outputs = torch.argmax(outputs.squeeze(), dim=1).detach().cpu().numpy()
+
+ mIoU = label_accuracy_score(masks.detach().cpu().numpy(), outputs, n_class=12)
+ mIoU_list.append(mIoU)
+
+ avrg_loss = total_loss / cnt
+ model.train()
+ return avrg_loss, np.mean(mIoU_list)
+
+
+def validation2(model, data_loader, criterion, device, n_class=12):
+ model.eval()
+ with torch.no_grad():
+ total_loss = 0
+ cnt = 0
+ hist = np.zeros((n_class, n_class))
+ for step, (images, masks) in enumerate(data_loader):
+ images, masks = images.to(device), masks.long().to(device)
+
+ outputs = model(images)
+ loss = criterion(outputs, masks)
+ total_loss += loss
+ cnt += 1
+
+ outputs = torch.argmax(outputs.squeeze(), dim=1).detach().cpu().numpy()
+
+ hist = add_hist(hist, masks.detach().cpu().numpy(), outputs, n_class=n_class)
+
+ avrg_loss = total_loss / cnt
+ mIoU = mIoU_score(hist)
+ model.train()
+ return avrg_loss, mIoU
+
+
+def mIoU_score(hist):
+ with np.errstate(divide='ignore', invalid='ignore'):
+ iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
+ mean_iu = np.nanmean(iu)
+ return mean_iu
+
+
+def add_hist(hist, label_trues, label_preds, n_class):
+ for lt, lp in zip(label_trues, label_preds):
+ hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
+ return hist
+
+def _fast_hist(label_true, label_pred, n_class):
+ mask = (label_true >= 0) & (label_true < n_class)
+ hist = np.bincount(
+ n_class * label_true[mask].astype(int) +
+ label_pred[mask], minlength=n_class ** 2).reshape(n_class, n_class)
+ return hist
+
+
+def label_accuracy_score(label_trues, label_preds, n_class=12):
+ hist = np.zeros((n_class, n_class))
+ for lt, lp in zip(label_trues, label_preds):
+ hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
+ with np.errstate(divide='ignore', invalid='ignore'):
+ iu = np.diag(hist) / (
+ hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)
+ )
+ mean_iu = np.nanmean(iu)
+ return mean_iu
diff --git a/scheduler.py b/scheduler.py
new file mode 100644
index 0000000..513a376
--- /dev/null
+++ b/scheduler.py
@@ -0,0 +1,58 @@
+import math
+from torch.optim.lr_scheduler import _LRScheduler
+
+class CosineAnnealingWarmUpRestarts(_LRScheduler):
+ def __init__(self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1., last_epoch=-1):
+ if T_0 <= 0 or not isinstance(T_0, int):
+ raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
+ if T_mult < 1 or not isinstance(T_mult, int):
+ raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
+ if T_up < 0 or not isinstance(T_up, int):
+ raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
+ self.T_0 = T_0
+ self.T_mult = T_mult
+ self.base_eta_max = eta_max
+ self.eta_max = eta_max
+ self.T_up = T_up
+ self.T_i = T_0
+ self.gamma = gamma
+ self.cycle = 0
+ self.T_cur = last_epoch
+ super(CosineAnnealingWarmUpRestarts, self).__init__(optimizer, last_epoch)
+
+
+ def get_lr(self):
+ if self.T_cur == -1:
+ return self.base_lrs
+ elif self.T_cur < self.T_up:
+ return [(self.eta_max - base_lr)*self.T_cur / self.T_up + base_lr for base_lr in self.base_lrs]
+ else:
+ return [base_lr + (self.eta_max - base_lr) * (1 + math.cos(math.pi * (self.T_cur-self.T_up) / (self.T_i - self.T_up))) / 2
+ for base_lr in self.base_lrs]
+
+ def step(self, epoch=None):
+ if epoch is None:
+ epoch = self.last_epoch + 1
+ self.T_cur = self.T_cur + 1
+ if self.T_cur >= self.T_i:
+ self.cycle += 1
+ self.T_cur = self.T_cur - self.T_i
+ self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
+ else:
+ if epoch >= self.T_0:
+ if self.T_mult == 1:
+ self.T_cur = epoch % self.T_0
+ self.cycle = epoch // self.T_0
+ else:
+ n = int(math.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult))
+ self.cycle = n
+ self.T_cur = epoch - self.T_0 * (self.T_mult ** n - 1) / (self.T_mult - 1)
+ self.T_i = self.T_0 * self.T_mult ** (n)
+ else:
+ self.T_i = self.T_0
+ self.T_cur = epoch
+
+ self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
+ self.last_epoch = math.floor(epoch)
+ for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
+ param_group['lr'] = lr
\ No newline at end of file
diff --git a/train_wandb.py b/train_wandb.py
new file mode 100644
index 0000000..95c1975
--- /dev/null
+++ b/train_wandb.py
@@ -0,0 +1,168 @@
+import os
+import random
+import time
+import json
+import wandb
+import warnings
+warnings.filterwarnings('ignore')
+
+import numpy as np
+import pandas as pd
+
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset, DataLoader
+import segmentation_models_pytorch as smp
+
+from pycocotools.coco import COCO
+import cv2
+import torchvision
+import torchvision.transforms as transforms
+import albumentations as A
+from albumentations.pytorch import ToTensorV2
+
+import matplotlib.pyplot as plt
+from natsort import natsorted
+from torch.cuda.amp import GradScaler, autocast
+
+from utils import *
+from dataloader import *
+#from loss import *
+from scheduler import *
+from evaluate import *
+
+
+def train(config=None):
+ wandb.init(config=config)
+ config = wandb.config
+
+ ### Hyper parameters ###
+ SEED = config.SEED
+ BATCH_SIZE = config.BATCH_SIZE
+ EPOCHS = config.EPOCHS
+ LR = config.LR
+ save_model_name = f'{config.project_name}_seed{SEED}_batch{BATCH_SIZE}'
+ accumulation_step = 1
+ best_val_mIoU = 0.30
+
+
+
+ ### SEED setting ###
+ torch.manual_seed(SEED)
+ torch.cuda.manual_seed(SEED)
+ torch.backends.cudnn.deterministic = True
+ torch.backends.cudnn.benchmark = False
+ np.random.seed(SEED)
+ random.seed(SEED)
+
+
+ ### Dataset ###
+ dataset_path = 'input/data'
+ train_path = dataset_path + '/train.json'
+ val_path = dataset_path + '/val.json'
+ test_path = dataset_path + '/test.json'
+
+ train_transform = A.Compose([
+ ToTensorV2()
+ ])
+
+ test_transform = A.Compose([
+ ToTensorV2()
+ ])
+
+ train_dataset = CustomDataLoader(data_dir=train_path, mode='train', transform=train_transform)
+ val_dataset = CustomDataLoader(data_dir=val_path, mode='val', transform=test_transform)
+ test_dataset = CustomDataLoader(data_dir=test_path, mode='test', transform=test_transform)
+
+ train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
+ val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
+ test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
+
+
+ ### Model ###
+ model = smp.DeepLabV3Plus(
+ encoder_name='resnext50_32x4d',
+ encoder_weights='imagenet',
+ classes=12
+ ).to(device)
+ wandb.watch(model)
+
+
+ ### Train ###
+ criterion = nn.CrossEntropyLoss()
+ optimizer = torch.optim.Adam(model.parameters(), lr=LR[1])
+ scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=EPOCHS, eta_max=LR[0], T_up=2, gamma=0.5)
+
+ scaler = GradScaler()
+ print("Start training..")
+ for epoch in range(EPOCHS):
+ epoch+=1
+ avg_loss = 0
+ batch_count = len(train_loader)
+
+ for step, (images, masks) in enumerate(train_loader):
+ start = time.time()
+ images, masks = images.to(device), masks.long().to(device)
+
+ with autocast():
+ output = model(images)
+ loss = criterion(output, masks)
+ scaler.scale(loss).backward()
+
+ if (step+1)%accumulation_step==0:
+ scaler.step(optimizer)
+ scaler.update()
+ optimizer.zero_grad()
+
+ avg_loss += loss.item() / batch_count
+ print(f"\rEpoch:{epoch:3d} step:{step:3d}/{batch_count-1} time:{time.time() - start:.3f} LR:{LR:.6f}", end='')
+
+ scheduler.step()
+ val_loss, val_mIoU = validation(model, val_loader, criterion, device)
+ print(f" loss: {avg_loss:.3f} val_loss: {val_loss:.3f} val_mIoU:{val_mIoU:.3f}")
+ wandb.log({"loss": avg_loss, "val_loss": val_loss, "val_mIoU": val_mIoU})
+ if best_val_mIoU < val_mIoU:
+ save_model(model, saved_dir="model", file_name=save_model_name + f'_epoch{epoch}_score{val_mIoU:.3f}.pt')
+ best_val_mIoU = val_mIoU
+ print("Finish training")
+
+
+def main():
+ project_name = 'se_resnext50_32x4d'
+ count = 20
+
+ sweep_config = {
+ 'method': 'bayes'
+ }
+ metric = {
+ 'name': 'val_mIoU',
+ 'goal': 'maximize'
+ }
+ sweep_config['metric'] = metric
+
+ parameters_dict = {
+ 'SEED': {
+ 'distribution': 'uniform'
+ 'max': 9999
+ 'min': 1
+ },
+ 'BATCH_SIZE': {
+ 'values': [4,8,16]
+ },
+ 'LR': {
+ 'value': (1e-4,2e-6)
+ },
+ 'scheduler': {
+ 'value': None
+ }
+ 'project_name':{
+ 'value': project_name
+ },
+ }
+ sweep_config['parameters'] = parameters_dict
+
+ sweep_id = wandb.sweep(sweep_config, project=project_name)
+ wandb.agent(sweep_id, train, count=count)
+
+if __name__ == '__main__':
+ main()
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..c8ff31e
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,64 @@
+# https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/utils.py
+import os
+import glob
+import torch
+import numpy as np
+
+
+def save_model(model, saved_dir="model", file_name="default.pt"):
+ #os.makedirs(saved_dir, exist_ok=True)
+ check_point = {'model' : model.state_dict()}
+ path = os.path.join(saved_dir, file_name)
+ torch.save(check_point, path)
+
+def load_model(model, device, saved_dir="model", file_name="default.pt"):
+ path = os.path.join(saved_dir, file_name)
+ checkpoint = torch.load(path, map_location=device)
+ model.load_state_dict(state_dict=checkpoint['model'])
+ print("load success")
+
+
+def calculate_parameter(model, print_param=False):
+ n_param = 0
+ n_conv = 0
+ for p_idx,(param_name,param) in enumerate(model.named_parameters()):
+ if param.requires_grad:
+ param_numpy = param.detach().cpu().numpy() # to numpy array
+ n_param += len(param_numpy.reshape(-1))
+ if print_param==True:
+ print ("[%d] name:[%s] shape:[%s]."%(p_idx,param_name,param_numpy.shape))
+ if "conv" in param_name: n_conv+=1
+ print("-"*50+f"\nTotal number of parameters: [{n_param:,d}]\n"+"-"*50)
+ print(f"Total number of Conv layer : {n_conv}")
+
+
+import json
+import requests
+import os
+from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse
+
+def submit(file_path = '', desc=""):
+ url = urlparse('http://ec2-13-124-161-225.ap-northeast-2.compute.amazonaws.com:8000/api/v1/competition/28/presigned_url/?description=&hyperparameters={%22training%22:{},%22inference%22:{}}')
+ qs = dict(parse_qsl(url.query))
+ qs['description'] = desc
+ parts = url._replace(query=urlencode(qs))
+ url = urlunparse(parts)
+
+ print(url)
+ headers = {
+ 'Authorization': 'Bearer 0f527e16e65386933b5320164e9f30523c13251c' #user_key
+ }
+ res = requests.get(url, headers=headers)
+ print(res.text)
+ data = json.loads(res.text)
+
+ submit_url = data['url']
+ body = {
+ 'key':'app/Competitions/000028/Users/{}/Submissions/{}/output.csv'.format(str(data['submission']['user']).zfill(8),str(data['submission']['local_id']).zfill(4)),
+ 'x-amz-algorithm':data['fields']['x-amz-algorithm'],
+ 'x-amz-credential':data['fields']['x-amz-credential'],
+ 'x-amz-date':data['fields']['x-amz-date'],
+ 'policy':data['fields']['policy'],
+ 'x-amz-signature':data['fields']['x-amz-signature']
+ }
+ requests.post(url=submit_url, data=body, files={'file': open(file_path, 'rb')})