Skip to content

Commit

Permalink
Merge pull request #75 from kookmin-sw/swjeong
Browse files Browse the repository at this point in the history
ray 를 위한 nodepool 추가 및 ResNet34 예시 코드 작성
  • Loading branch information
mh3ong authored May 16, 2024
2 parents 28eda8e + 78c9ce1 commit 18a459e
Show file tree
Hide file tree
Showing 8 changed files with 768 additions and 1 deletion.
8 changes: 8 additions & 0 deletions automation/karpenter_node_pool_deploy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ def handler(event, context):
capacity_type = 'on-demand'
result = apply_nodepool_yaml(eks_cluster_name, region, profiler_nodepool_name, profiler_nodeclass_name, profiler_nodepool_family_list, capacity_type)

ray_nodepool_name = 'ray-ondemand-pool'
ray_nodepool_family_list = [
't3.medium', 't3.large', 'm5.large',
]
ray_nodeclass_name = 'ec2-cpu'
capacity_type = 'on-demand'
result = apply_nodepool_yaml(eks_cluster_name, region, ray_nodepool_name, ray_nodeclass_name, ray_nodepool_family_list, capacity_type)

return {
'statusCode': 200,
'body': "complete update nodepool"
Expand Down
2 changes: 2 additions & 0 deletions example/ResNet34_CIFAR-10/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cifar-10*
torch.pt
18 changes: 18 additions & 0 deletions example/ResNet34_CIFAR-10/download_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import torchvision
import torchvision.transforms as transforms
import os


transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

cwd = os.getcwd()

# 학습 데이터셋 다운로드
trainset = torchvision.datasets.CIFAR10(root=f'{cwd}/example/ResNet34_CIFAR-10/', train=True,
download=True, transform=transform)

# 테스트 데이터셋 다운로드
testset = torchvision.datasets.CIFAR10(root=f'{cwd}/example/ResNet34_CIFAR-10/', train=False,
download=True, transform=transform)
60 changes: 60 additions & 0 deletions example/ResNet34_CIFAR-10/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from torch import nn

class BasicBlock(nn.Module):
expansion = 1

def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)

self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion * planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion * planes)
)

def forward(self, x):
out = nn.ReLU()(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = nn.ReLU()(out)
return out

class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64

self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512 * block.expansion, num_classes)

def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)

def forward(self, x):
out = nn.ReLU()(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = nn.AdaptiveAvgPool2d((1, 1))(out)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out

def ModelClass():
return ResNet(BasicBlock, [3, 4, 6, 3])
Empty file.
68 changes: 68 additions & 0 deletions example/ResNet34_CIFAR-10/sskai_load_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# 전처리
def sskai_load_data():
import pickle
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset, ConcatDataset, random_split

def load_cifar_batch(filename):
with open(filename, 'rb') as file:
batch = pickle.load(file, encoding='latin1')
# 이미지 데이터 재배열: [num_samples, 3, 32, 32]
features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
labels = batch['labels']
return features, labels

def create_datasets(data_paths, test_path):
# 데이터를 로드하고 하나의 큰 훈련 데이터셋으로 결합
train_features = []
train_labels = []
for path in data_paths:
features, labels = load_cifar_batch(path)
train_features.append(features)
train_labels.append(labels)
train_features = np.concatenate(train_features)
train_labels = np.concatenate(train_labels)

# 테스트 데이터 로드
test_features, test_labels = load_cifar_batch(test_path)

# numpy 배열을 PyTorch 텐서로 변환
train_features = torch.tensor(train_features).permute(0, 3, 1, 2).float() / 255.0
train_labels = torch.tensor(train_labels).long()
test_features = torch.tensor(test_features).permute(0, 3, 1, 2).float() / 255.0
test_labels = torch.tensor(test_labels).long()

# TensorDataset 생성
train_dataset = TensorDataset(train_features, train_labels)
test_dataset = TensorDataset(test_features, test_labels)
return train_dataset, test_dataset

data_dir_path = "./cifar-10/"
data_paths = [f'{data_dir_path}/data_batch_{i}' for i in range(1, 6)]
test_path = f'{data_dir_path}/test_batch'

# 데이터셋 생성
train_dataset, test_dataset = create_datasets(data_paths, test_path)

# DataLoader 생성 예
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 데이터셋 합치기
combined_dataset = torch.utils.data.ConcatDataset([train_dataset, test_dataset])

# 합친 데이터셋으로 DataLoader 생성
combined_loader = torch.utils.data.DataLoader(combined_dataset, batch_size=64, shuffle=True)

# combined_dataset에서 data와 label을 따로 떼어서 x, y 변수에 할당
x = []
y = []
for data, label in combined_dataset:
x.append(data)
y.append(label)

return x, y

if __name__ == '__main__':
sskai_load_data()
611 changes: 611 additions & 0 deletions example/ResNet34_CIFAR-10/train.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion recommend/family_recommend/family/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def get_family_for_inference(region):
df['TotalScore'] = df.apply(get_total_instance_score, args=(max_price, max_benchmark), axis=1)

groups = [
['g3', 'p2'], # group 1
['g3'], # group 1 (호환성 문제로 p2 제거)
['g4dn', 'g5g', 'p3'], # group 2
['g6', 'gr6', 'g5'], # group 3
['p3dn', 'p4d'], # group 4
Expand Down

0 comments on commit 18a459e

Please sign in to comment.