Skip to content

Commit

Permalink
fix cifar_c and configs
Browse files Browse the repository at this point in the history
  • Loading branch information
Lupin1998 committed Nov 23, 2023
1 parent ad179bc commit a710312
Show file tree
Hide file tree
Showing 63 changed files with 263 additions and 62 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# dataset settings
data_source_cfg = dict(type='CIFAR_C', root='data/cifar100_c/')

dataset_type = 'ClassificationDataset'
img_norm_cfg = dict(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.201])
train_pipeline = [
dict(type='RandomResizedCrop', size=224, scale=[0.8, 1], interpolation=3), # bicubic
dict(type='RandomHorizontalFlip'),
]
test_pipeline = [
dict(type='Resize', size=224, interpolation=3),
]
# prefetch
prefetch = False
if not prefetch:
train_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])
test_pipeline.extend([dict(type='ToTensor'), dict(type='Normalize', **img_norm_cfg)])

data = dict(
imgs_per_gpu=100, # 100 x 1gpu = 100
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_source=dict(split='train', **data_source_cfg),
pipeline=train_pipeline,
prefetch=prefetch,
),
val=dict(
type=dataset_type,
data_source=dict(split='test', **data_source_cfg),
pipeline=test_pipeline,
prefetch=False),
)

# validation hook
evaluation = dict(
initial=False,
interval=1,
imgs_per_gpu=100,
workers_per_gpu=4,
eval_param=dict(topk=(1, 5)),
save_best='auto')

# checkpoint
checkpoint_config = dict(interval=10, max_keep_ckpts=1)
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
end_momentum=0.99999,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1)
update_interval=1)
]
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
end_momentum=0.99999,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1)
update_interval=1)
]
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
end_momentum=0.99996,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1)
update_interval=1)
]
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
end_momentum=0.99996,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1)
update_interval=1)
]
# optimizer
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
end_momentum=0.999999,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1),
update_interval=1),
dict(type='SAVEHook',
iter_per_epoch=500,
save_interval=12500, # plot every 500 x 25 ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
end_momentum=0.999999,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1),
update_interval=1),
dict(type='SAVEHook',
iter_per_epoch=500,
save_interval=12500, # plot every 500 x 25 ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
end_momentum=0.999999,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1),
update_interval=1),
dict(type='SAVEHook',
iter_per_epoch=500,
save_interval=12500, # plot every 500 x 25 ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
end_momentum=0.999999,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1),
update_interval=1),
dict(type='SAVEHook',
iter_per_epoch=500,
save_interval=12500, # plot every 500 x 25 ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
adjust_scope=[0.25, 1.0],
warming_up="constant",
update_interval=update_interval,
interval=1)
)
]

# optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
adjust_scope=[0.25, 1.0],
warming_up="constant",
update_interval=update_interval,
interval=1)
)
]

# optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
adjust_scope=[0.25, 1.0],
warming_up="constant",
update_interval=update_interval,
interval=1)
)
]

# optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
adjust_scope=[0.25, 1.0],
warming_up="constant",
update_interval=update_interval,
interval=1)
)
]

# optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
adjust_scope=[0.25, 1.0],
warming_up="constant",
update_interval=update_interval,
interval=1)
)
]

# optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
adjust_scope=[0.25, 1.0],
warming_up="constant",
update_interval=update_interval,
interval=1)
)
]

# optimizer
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
_base_ = [
'../../../_base_/datasets/cifar100/sz224_corruption_bs100.py',
'../../../_base_/default_runtime.py',
]

# model settings
model = dict(
type='MixUpClassification',
pretrained=None,
alpha=[1, 0.8],
mix_mode=['cutmix', 'mixup'],
mix_args=dict(
alignmix=dict(eps=0.1, max_iter=100),
attentivemix=dict(grid_size=32, top_k=None, beta=8), # AttentiveMix+ in this repo (use pre-trained)
automix=dict(mask_adjust=0, lam_margin=0), # require pre-trained mixblock
fmix=dict(decay_power=3, size=(224,224), max_soft=0., reformulate=False),
gridmix=dict(n_holes=(2, 6), hole_aspect_ratio=1.,
cut_area_ratio=(0.5, 1), cut_aspect_ratio=(0.5, 2)),
manifoldmix=dict(layer=(0, 3)),
puzzlemix=dict(transport=True, t_batch_size=32, t_size=-1, # adjust t_batch_size if CUDA out of memory
mp=None, block_num=4, # block_num<=4 and mp=2/4 for fast training
beta=1.2, gamma=0.5, eta=0.2, neigh_size=4, n_labels=3, t_eps=0.8),
resizemix=dict(scope=(0.1, 0.8), use_alpha=True, interpolate_mode="bilinear"),
samix=dict(mask_adjust=0, lam_margin=0.08), # require pre-trained mixblock
transmix=dict(mix_mode="cutmix"),
),
backbone=dict(
type='VisionTransformer',
arch='deit-small',
img_size=224, patch_size=16,
drop_path=0.1,
),
head=dict(
type='VisionTransformerClsHead', # mixup CE + label smooth
loss=dict(type='LabelSmoothLoss',
label_smooth_val=0.1, num_classes=100, mode='original', loss_weight=1.0),
in_channels=384, num_classes=100),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer=['LayerNorm', 'BatchNorm'], val=1., bias=0.)
],
)

# optimizer
optimizer = dict(
type='AdamW',
lr=1e-3,
weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999),
paramwise_options={
'(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
'norm': dict(weight_decay=0.),
'bias': dict(weight_decay=0.),
'cls_token': dict(weight_decay=0.),
'pos_embed': dict(weight_decay=0.),
})

# interval for accumulate gradient
update_interval = 1 # total: 1 x bs100 x 1 accumulates = bs100

# fp16
use_fp16 = False
fp16 = dict(type='mmcv', loss_scale='dynamic')
optimizer_config = dict(
grad_clip=dict(max_norm=5.0), update_interval=update_interval)

# learning policy
lr_config = dict(
policy='CosineAnnealing',
by_epoch=False, min_lr=1e-6,
warmup='linear',
warmup_iters=20, warmup_by_epoch=True,
warmup_ratio=1e-5,
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
_base_ = [
'../../../_base_/datasets/cifar100/sz224_corruption_bs100.py',
'../../../_base_/default_runtime.py',
]

# model settings
model = dict(
type='MixUpClassification',
pretrained=None,
alpha=[1, 0.8],
mix_mode=['cutmix', 'mixup'],
mix_args=dict(
alignmix=dict(eps=0.1, max_iter=100),
attentivemix=dict(grid_size=32, top_k=None, beta=8), # AttentiveMix+ in this repo (use pre-trained)
automix=dict(mask_adjust=0, lam_margin=0), # require pre-trained mixblock
fmix=dict(decay_power=3, size=(224,224), max_soft=0., reformulate=False),
gridmix=dict(n_holes=(2, 6), hole_aspect_ratio=1.,
cut_area_ratio=(0.5, 1), cut_aspect_ratio=(0.5, 2)),
manifoldmix=dict(layer=(0, 3)),
puzzlemix=dict(transport=True, t_batch_size=32, t_size=-1, # adjust t_batch_size if CUDA out of memory
mp=None, block_num=4, # block_num<=4 and mp=2/4 for fast training
beta=1.2, gamma=0.5, eta=0.2, neigh_size=4, n_labels=3, t_eps=0.8),
resizemix=dict(scope=(0.1, 0.8), use_alpha=True, interpolate_mode="bilinear"),
samix=dict(mask_adjust=0, lam_margin=0.08), # require pre-trained mixblock
transmix=dict(mix_mode="cutmix"),
),
backbone=dict(
type='SwinTransformer',
arch='tiny',
img_size=224,
drop_path_rate=0.2,
out_indices=(3,), # x-1: stage-x
),
head=dict(
type='ClsMixupHead', # mixup CE + label smooth
loss=dict(type='LabelSmoothLoss',
label_smooth_val=0.1, num_classes=100, mode='original', loss_weight=1.0),
with_avg_pool=True,
in_channels=768, num_classes=100),
init_cfg=[
dict(type='TruncNormal', layer='Linear', std=0.02, bias=0.),
dict(type='Constant', layer=['LayerNorm', 'BatchNorm'], val=1., bias=0.)
],
)

# optimizer
optimizer = dict(
type='AdamW',
lr=5e-4,
weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999),
paramwise_options={
'(bn|ln|gn)(\d+)?.(weight|bias)': dict(weight_decay=0.),
'norm': dict(weight_decay=0.),
'bias': dict(weight_decay=0.),
'absolute_pos_embed': dict(weight_decay=0.),
'relative_position_bias_table': dict(weight_decay=0.),
})

# interval for accumulate gradient
update_interval = 1 # total: 1 x bs100 x 1 accumulates = bs100

# fp16
use_fp16 = False
fp16 = dict(type='mmcv', loss_scale='dynamic')
optimizer_config = dict(
grad_clip=dict(max_norm=5.0), update_interval=update_interval)

# learning policy
lr_config = dict(
policy='CosineAnnealing',
by_epoch=False, min_lr=1e-6,
warmup='linear',
warmup_iters=20, warmup_by_epoch=True,
warmup_ratio=1e-5,
)

# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
end_momentum=0.999999,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1),
update_interval=1),
dict(type='SAVEHook',
iter_per_epoch=500,
save_interval=12500, # plot every 500 x 25 ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
end_momentum=0.999999,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1),
update_interval=1),
dict(type='SAVEHook',
iter_per_epoch=500,
save_interval=12500, # plot every 500 x 25 ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
end_momentum=0.999999,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1),
update_interval=1),
dict(type='SAVEHook',
iter_per_epoch=500,
save_interval=12500, # plot every 500 x 25 ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
end_momentum=0.999999,
adjust_scope=[0.1, 1.0],
warming_up="constant",
interval=1),
update_interval=1),
dict(type='SAVEHook',
iter_per_epoch=500,
save_interval=12500, # plot every 500 x 25 ep
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
adjust_scope=[0.25, 1.0],
warming_up="constant",
update_interval=update_interval,
interval=1)
)
]

# optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
adjust_scope=[0.25, 1.0],
warming_up="constant",
update_interval=update_interval,
interval=1)
)
]

# optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
adjust_scope=[0.25, 1.0],
warming_up="constant",
update_interval=update_interval,
interval=1)
)
]

# optimizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
adjust_scope=[0.25, 1.0],
warming_up="constant",
update_interval=update_interval,
interval=1)
)
]

# optimizer
Expand Down
Loading

0 comments on commit a710312

Please sign in to comment.