forked from fishial/fish-identification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
145 lines (111 loc) · 6.08 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import sys
#Change path specificly to your directories
sys.path.insert(1, '/home/codahead/Fishial/FishialReaserch')
import os
from utils import run_eval_checkpoints
from utils import get_current_date_in_format
from utils import get_dataset_dicts
from utils import remove_tmp_files
from utils import save_json
from module.segmentation_package.src.trainer import Trainer
import torch
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.data import MetadataCatalog
from detectron2.data import DatasetCatalog
from detectron2.utils.logger import setup_logger
from detectron2.engine import DefaultTrainer
setup_logger()
def main():
train_folder = "Train"
for d in ["Train", "Test"]:
DatasetCatalog.register("fishial_" + d, lambda d=d: get_dataset_dicts("fishial_collection", d, json_file='fishial_collection_correct.json'))
MetadataCatalog.get("fishial_" + d).set(thing_classes=["fish"], evaluator_type="coco")
config_path = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
experiment_name = "amp_on"
main_folder = "output"
learning_rates = [0.00025] # .01 .001 .0001 .00025 .000025
roi_batch_sizes = [128] # 32, 64, 128, 256
freezing_layers = [2] # 0 1 2 3 4 5
amp = [True]
crop_enables = [True]
max_iters = 100000
check_point_step = 2500
experiment_num = 0
# Pick and set up a GPU. This serve got 8 [0, ... , 7]
# torch.cuda.set_device(7)
# Tune hyperparams with val set.
for freezing_layer in freezing_layers:
for learning_rate in learning_rates:
for crop_enable in crop_enables:
for amp_state in amp:
for roi_batch_size in roi_batch_sizes:
torch.cuda.empty_cache()
experiment_folder = os.path.join(main_folder,
os.path.join(experiment_name, get_current_date_in_format()))
# Create experiment folder
os.makedirs(experiment_folder, exist_ok=True)
experiment_settings = {
"roi_batch_size": roi_batch_size,
"amp_state": amp_state,
"crop_enable": crop_enable,
"learning_rate": learning_rate,
"freezing_layer": freezing_layer
}
# Save config of experiment to experiment folder.
save_json(experiment_settings, os.path.join(experiment_folder, "ckpt_eval.json"))
# Get default config node.
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(config_path))
cfg.DATASETS.TRAIN = ("fishial_" + train_folder, )
# If it is set, after the trainer() will start automatically trainer.test()
cfg.DATASETS.TEST = ()
# Number of data loading threads, dft=4
cfg.DATALOADER.NUM_WORKERS = 2
# Path to a checkpoint file to be loaded to the model.
# You can find available models in the model zoo.
# Here I'm loading the weights obtained through the training on the COCO dataset
cfg.MODEL.WEIGHTS = "best_scores/model_0067499_amp_on.pth"
# Number of images per batch across all machines.
cfg.SOLVER.IMS_PER_BATCH = 2
# cfg.SEED = 3
cfg.SOLVER.MAX_ITER = max_iters
# Save checkpoits each specific step
cfg.SOLVER.CHECKPOINT_PERIOD = check_point_step
#automatic mix precision
cfg.SOLVER.AMP.ENABLED = amp_state
# `True` if cropping is used for data augmentation during training
cfg.INPUT.CROP.ENABLED = crop_enable
cfg.TEST.AUG.ENABLED = crop_enable
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (fish)
# Not filter out images with no annotations. keep them
cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True
# cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0, 3.0, 4.0, 5.0]]
# cfg.MODEL.ANCHOR_GENERATOR.OFFSET = 0.5
# cfg.MODEL.PIXEL_MEAN = [44.2694227416269, 44.2694227416269, 44.2694227416269]
# cfg.MODEL.PIXEL_STD = [57.375, 57.375, 57.375]
cfg.OUTPUT_DIR = experiment_folder
cfg.MODEL.BACKBONE.FREEZE_AT = freezing_layer
cfg.SOLVER.BASE_LR = learning_rate
# RoI minibatch size *per image* (number of regions of interest [ROIs])
# Total number of RoIs per training minibatch =
# ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = roi_batch_size # (default: 512)
# Build a trainer from the specified config
trainer = DefaultTrainer(cfg) #Trainer
# If resume==True, and last checkpoint exists, resume from it.
# Otherwise, load a model specified by the config.
trainer.resume_or_load()
# Start training
trainer.train()
torch.cuda.empty_cache()
#Remove tmp files
remove_tmp_files(experiment_folder)
run_eval_checkpoints(cfg, experiment_folder, "fishial_Test")
experiment_num += 1
print("Currently was finished: |{}| ".format(experiment_num))
print(70 * "*")
print("Finish")
print(70 * "*")
if __name__ == "__main__":
main()