-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathtresnet.py
236 lines (209 loc) · 9.73 KB
/
tresnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
import logging
from copy import deepcopy
from functools import wraps, partial
import torch
import torch.nn as nn
from torch.hub import load_state_dict_from_url
from pytorch_tools.models.resnet import ResNet
from pytorch_tools.modules import TBasicBlock, TBottleneck
from pytorch_tools.modules import FastGlobalAvgPool2d, BlurPool
from pytorch_tools.modules import SpaceToDepth
from pytorch_tools.modules.residual import conv1x1, conv3x3
from pytorch_tools.modules import bn_from_name
from pytorch_tools.modules import ABN
from pytorch_tools.utils.misc import add_docs_for
from pytorch_tools.utils.misc import repeat_channels
# avoid overwriting doc string
wraps = partial(wraps, assigned=("__module__", "__name__", "__qualname__", "__annotations__"))
class TResNet(ResNet):
"""TResNet M / TResNet L / XL
Ref:
* TResNet paper: https://arxiv.org/abs/2003.13630
Args:
layers (List[int]):
Numbers of layers in each block.
pretrained (str, optional):
If not, returns a model pre-trained on 'str' dataset. `imagenet` is available
for every model but some have more. Check the code to find out.
num_classes (int):
Number of classification classes. Defaults to 1000.
in_channels (int):
Number of input (color) channels. Defaults to 3.
width_factor (int):
Stem width is 64 * width_factor. neede to make larger models
output_stride (List[8, 16, 32]): Applying dilation strategy to pretrained ResNet. Typically used in
Semantic Segmentation. Defaults to 32.
NOTE: Don't use this arg with `antialias` and `pretrained` together. it may produce weird results
norm_layer (str):
Normalization layer to use. One of 'abn', 'inplaceabn'. The inplace version lowers memory footprint.
But increases backward time. Defaults to 'inplaceabn'.
norm_act (str):
Activation for normalizion layer. It's reccomended to use `leacky_relu` with `inplaceabn`.
encoder (bool):
Flag to overwrite forward pass to return 5 tensors with different resolutions. Defaults to False.
NOTE: TResNet first features have resolution 4x times smaller than input, not 2x as all other models.
So it CAN'T be used as encoder in Unet and Linknet models
drop_rate (float):
Dropout probability before classifier, for training. Defaults to 0.
drop_connect_rate (float):
Drop rate for StochasticDepth. Randomly removes samples each block. Used as regularization during training. Ref: https://arxiv.org/abs/1603.09382
"""
def __init__(
self,
layers=None,
pretrained=None, # not used. here for proper signature
num_classes=1000,
in_channels=3,
width_factor=1.0,
output_stride=32,
norm_layer="inplaceabn",
norm_act="leaky_relu",
encoder=False,
drop_rate=0.0,
drop_connect_rate=0.0,
):
nn.Module.__init__(self)
stem_width = int(64 * width_factor)
norm_layer = bn_from_name(norm_layer)
self.inplanes = stem_width
self.num_classes = num_classes
self.groups = 1 # not really used but needed inside _make_layer
self.base_width = 64 # used inside _make_layer
self.norm_act = norm_act
self.block_idx = 0
self.num_blocks = sum(layers)
self.drop_connect_rate = drop_connect_rate
self._make_stem("space2depth", stem_width, in_channels, norm_layer, norm_act)
if output_stride not in [8, 16, 32]:
raise ValueError("Output stride should be in [8, 16, 32]")
# TODO add OS later
# if output_stride == 8:
# stride_3, stride_4, dilation_3, dilation_4 = 1, 1, 2, 4
# elif output_stride == 16:
# stride_3, stride_4, dilation_3, dilation_4 = 2, 1, 1, 2
# elif output_stride == 32:
stride_3, stride_4, dilation_3, dilation_4 = 2, 2, 1, 1
largs = dict(attn_type="se", norm_layer=norm_layer, norm_act=norm_act, antialias=True)
self.block = TBasicBlock
self.expansion = TBasicBlock.expansion
self.layer1 = self._make_layer(stem_width, layers[0], stride=1, **largs)
self.layer2 = self._make_layer(stem_width * 2, layers[1], stride=2, **largs)
self.block = TBottleneck # first 2 - Basic, last 2 - Bottleneck
self.expansion = TBottleneck.expansion
self.layer3 = self._make_layer(stem_width * 4, layers[2], stride=stride_3, dilation=dilation_3, **largs)
largs.update(attn_type=None) # no se in last layer
self.layer4 = self._make_layer(stem_width * 8, layers[3], stride=stride_4, dilation=dilation_4, **largs)
self.global_pool = FastGlobalAvgPool2d(flatten=True)
self.num_features = stem_width * 8 * self.expansion
self.encoder = encoder
if not encoder:
self.dropout = nn.Dropout(p=drop_rate, inplace=True)
self.last_linear = nn.Linear(self.num_features, num_classes)
else:
self.forward = self.encoder_features
self._initialize_weights(init_bn0=True)
def load_state_dict(self, state_dict, **kwargs):
if self.encoder:
state_dict.pop("last_linear.weight")
state_dict.pop("last_linear.bias")
nn.Module.load_state_dict(self, state_dict, **kwargs)
# fmt: off
# images should be normalized to [0, 1]
PRETRAIN_SETTINGS = {
"input_space": "RGB",
"input_size": [3, 448, 448],
"input_range": [0, 1],
"mean": [0., 0., 0.],
"std": [1., 1., 1.],
"num_classes": 1000,
}
# for each model there are weights trained on 224 crops from imagenet and
# weights finetuned on 448x448 crops. I'm using the latest as default because they work better for
# finetuning on large image sizes
CFGS = {
"tresnetm": {
"default": {"params": {"layers": [3, 4, 11, 3]}, **PRETRAIN_SETTINGS,},
# Acc@1 81.712 Acc@5 95.502 on 448 crops and proper normalization
"imagenet": {"url": "https://github.com/bonlime/pytorch-tools/releases/download/v0.1.2/patched_tresnet_m_448-a236aa30.pth"},
# Acc@1 80.644 Acc@5 94.762 on 224 crops
"imagenet2": {
"url": "https://github.com/bonlime/pytorch-tools/releases/download/v0.1.2/patched_tresnet_m-80fe4f81.pth",
"input_size": [3, 224, 224],
},
},
"tresnetl": {
"default": {"params": {"layers": [4, 5, 18, 3], "width_factor": 1.2}, **PRETRAIN_SETTINGS,},
# Acc@1 82.266 Acc@5 95.938
"imagenet": {"url": "https://github.com/bonlime/pytorch-tools/releases/download/v0.1.2/patched_tresnet_l_448-9401272c.pth"},
"imagenet2": {
"url": "https://github.com/bonlime/pytorch-tools/releases/download/v0.1.2/patched_tresnet_l-f37fa24a.pth",
"input_size": [3, 224, 224],
},
},
"tresnetxl": {
"default": {"params": {"layers": [4, 5, 24, 3], "width_factor": 1.3}, **PRETRAIN_SETTINGS,},
# Acc@1 83.046 Acc@5 96.172
"imagenet": {"url": "https://github.com/bonlime/pytorch-tools/releases/download/v0.1.2/patched_tresnet_xl_448-11841266.pth"},
# Acc@1 81.988 Acc@5 95.896
"imagenet2": {
"url": "https://github.com/bonlime/pytorch-tools/releases/download/v0.1.2/patched_tresnet_xl-493cb618.pth",
"input_size": [3, 224, 224],
},
},
}
# fmt: on
def patch_bn(module):
"""changes weight from InplaceABN to be compatible with usual ABN"""
if isinstance(module, ABN):
module.weight = nn.Parameter(module.weight.abs() + 1e-5)
for m in module.children():
patch_bn(m)
def _resnet(arch, pretrained=None, **kwargs):
cfgs = deepcopy(CFGS)
cfg_settings = cfgs[arch]["default"]
cfg_params = cfg_settings.pop("params")
if pretrained:
pretrained_settings = cfgs[arch][pretrained]
pretrained_params = pretrained_settings.pop("params", {})
cfg_settings.update(pretrained_settings)
cfg_params.update(pretrained_params)
common_args = set(cfg_params.keys()).intersection(set(kwargs.keys()))
if common_args:
logging.warning(f"Args {common_args} are going to be overwritten by default params for {pretrained} weights")
kwargs.update(cfg_params)
model = TResNet(**kwargs)
if pretrained:
state_dict = load_state_dict_from_url(cfgs[arch][pretrained]["url"], check_hash=True)
kwargs_cls = kwargs.get("num_classes", None)
if kwargs_cls and kwargs_cls != cfg_settings["num_classes"]:
logging.warning(
"Using model pretrained for {} classes with {} classes. Last layer is initialized randomly".format(
cfg_settings["num_classes"], kwargs_cls
)
)
# if there is last_linear in state_dict, it's going to be overwritten
state_dict["last_linear.weight"] = model.state_dict()["last_linear.weight"]
state_dict["last_linear.bias"] = model.state_dict()["last_linear.bias"]
if kwargs.get("in_channels", 3) != 3: # support pretrained for custom input channels
state_dict["conv1.1.weight"] = repeat_channels(
state_dict["conv1.1.weight"], kwargs["in_channels"] * 16, 3 * 16
)
model.load_state_dict(state_dict)
patch_bn(model)
setattr(model, "pretrained_settings", cfg_settings)
return model
@wraps(TResNet)
@add_docs_for(TResNet)
def tresnetm(**kwargs):
r"""Constructs a TResnetM model."""
return _resnet("tresnetm", **kwargs)
@wraps(TResNet)
@add_docs_for(TResNet)
def tresnetl(**kwargs):
r"""Constructs a TResnetL model."""
return _resnet("tresnetl", **kwargs)
@wraps(TResNet)
@add_docs_for(TResNet)
def tresnetxl(**kwargs):
r"""Constructs a TResnetXL model."""
return _resnet("tresnetxl", **kwargs)