From 8619068f29f7c2e2fcb052d840abbcfdc5c362af Mon Sep 17 00:00:00 2001 From: Bo Zhang Date: Mon, 17 May 2021 19:00:03 +0800 Subject: [PATCH 1/4] feat: add other effv2 models --- README.md | 13 ++++- effnetv2.py | 127 +++++++++++++++++++++++++++++++++++++++++++----- main.py | 42 ++++++++++++++++ parse_config.py | 59 ++++++++++++++++++++++ 4 files changed, 229 insertions(+), 12 deletions(-) create mode 100644 main.py create mode 100644 parse_config.py diff --git a/README.md b/README.md index 1f7fb20..073a390 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,21 @@ PyTorch 1.7+ is required to support [nn.SiLU](https://pytorch.org/docs/master/ge | Architecture | # Parameters | FLOPs | Top-1 Acc. (%) | | ----------------- | ------------ | ------ | -------------------------- | -| EfficientNetV2-S | 24M | 8.8B | | +| EfficientNetV2-S | 22.103832M | 23202.270720M | | +| EfficientNetV2-M | 55.300016M | 43557.531136M | +| EfficientNetV2-L | 119.355792M | 98599.022080M| +| EfficientNetV2-XL | 208.960328M | 144211.693568M| +| EfficientNetV2-B0 | 7.780248M | 5952.832768M | | +| EfficientNetV2-B1 | 9.009872M | 6685.624320M | | +| EfficientNetV2-B2 | 10.749136M | 9067.325440M | | +| EfficientNetV2-B3 | 14.461720M | 11929.994368M | | + +* Flops are all measured on input (224, 224). More model definitions are pending for architectural details from the authors. + + Stay tuned for ImageNet pre-trained weights. # Acknowledgement diff --git a/effnetv2.py b/effnetv2.py index 36c399e..135ddaf 100644 --- a/effnetv2.py +++ b/effnetv2.py @@ -10,7 +10,8 @@ import torch.nn as nn import math -__all__ = ['effnetv2_s'] +__all__ = ['effnetv2_s', 'effnetv2_m', 'effnetv2_l', 'effnetv2_xl', + 'effnetv2_base', 'effnetv2_b0', 'effnetv2_b1', 'effnetv2_b2', 'effnetv2_b3'] def _make_divisible(v, divisor, min_value=None): @@ -119,18 +120,10 @@ def forward(self, x): class EffNetV2(nn.Module): - def __init__(self, num_classes=1000, width_mult=1.): + def __init__(self, num_classes=1000, width_mult=1., cfgs=None): super(EffNetV2, self).__init__() # setting of inverted residual blocks - self.cfgs = [ - # t, c, n, s, SE - [1, 24, 2, 1, 0], - [4, 48, 4, 2, 0], - [4, 64, 4, 2, 0], - [4, 128, 6, 2, 1], - [6, 160, 9, 1, 1], - [6, 272, 15, 2, 1], - ] + self.cfgs = cfgs # building first layer input_channel = _make_divisible(24 * width_mult, 8) @@ -173,9 +166,121 @@ def _initialize_weights(self): m.weight.data.normal_(0, 0.001) m.bias.data.zero_() + def effnetv2_s(**kwargs): """ Constructs a EfficientNet V2 model """ + settings = {"cfgs": [ + # t, c, n, s, SE + [1, 24, 2, 1, 0], + [4, 48, 4, 2, 0], + [4, 64, 4, 2, 0], + [4, 128, 6, 2, 1], + [6, 160, 9, 1, 1], + [6, 256, 15, 2, 1], + ] + } + kwargs.update(settings) return EffNetV2(**kwargs) +def effnetv2_m(**kwargs): + """ + Constructs a EfficientNet V2 model + """ + settings = {"cfgs": [ + # t, c, n, s, SE + [1, 24, 3, 1, 0], + [4, 48, 5, 2, 0], + [4, 80, 5, 2, 0], + [4, 160, 7, 2, 1], + [6, 176, 14, 1, 1], + [6, 304, 18, 2, 1], + [6, 512, 5, 1, 1], + ] + } + kwargs.update(settings) + return EffNetV2(**kwargs) + + +def effnetv2_l(**kwargs): + """ + Constructs a EfficientNet V2 model + """ + settings = {"cfgs": [ + # t, c, n, s, SE + [1, 32, 4, 1, 0], + [4, 64, 7, 2, 0], + [4, 96, 7, 2, 0], + [4, 192, 10, 2, 1], + [6, 224, 19, 1, 1], + [6, 384, 25, 2, 1], + [6, 640, 7, 1, 1], + ] + } + kwargs.update(settings) + return EffNetV2(**kwargs) + + +def effnetv2_xl(**kwargs): + """ + Constructs a EfficientNet V2 model + """ + settings = {"cfgs": [ + # t, c, n, s, SE + [1, 32, 4, 1, 0], + [4, 64, 8, 2, 0], + [4, 96, 8, 2, 0], + [4, 192, 16, 2, 1], + [6, 256, 24, 1, 1], + [6, 512, 32, 2, 1], + [6, 640, 8, 1, 1], + ] + } + kwargs.update(settings) + return EffNetV2(**kwargs) + + +def effnetv2_base(**kwargs): + """ + Constructs a EfficientNet V2 model + """ + settings = {"cfgs": [ + # t, c, n, s, SE + [1, 16, 1, 1, 0], + [4, 32, 2, 2, 0], + [4, 48, 2, 2, 0], + [4, 96, 3, 2, 1], + [6, 112, 5, 1, 1], + [6, 192, 8, 2, 1], + ] + } + kwargs.update(settings) + return EffNetV2(**kwargs) + + +effnetv2_b0 = effnetv2_base + + +def effnetv2_b1(**kwargs): + settings = { + "width_mult": 1.1, + } + kwargs.update(settings) + return effnetv2_base(**kwargs) + + +def effnetv2_b2(**kwargs): + settings = { + "width_mult": 1.2, + } + kwargs.update(settings) + return effnetv2_base(**kwargs) + + +def effnetv2_b3(**kwargs): + settings = { + "width_mult": 1.4, + } + kwargs.update(settings) + return effnetv2_base(**kwargs) diff --git a/main.py b/main.py new file mode 100644 index 0000000..bc46e92 --- /dev/null +++ b/main.py @@ -0,0 +1,42 @@ +import torch +import argparse +from thop import profile + +from effnetv2 import * + +# for mac duplicate lib bug +import os +os.environ['KMP_DUPLICATE_LIB_OK']='True' + +def get_args_parser(): + parser = argparse.ArgumentParser('PVT training and evaluation script', add_help=False) + # Model parameters + parser.add_argument('-m', '--model', default='resnet50', type=str, metavar='MODEL', + help='Name of model to train') + parser.add_argument('-bs', '--batch_size', default=8, type=int, help='set batch size') + parser.add_argument('-e', '--export', action='store_true', help='convert to onnx models') + return parser + + +if __name__ == '__main__': + parser = argparse.ArgumentParser('generate onnx timm models', parents=[get_args_parser()]) + args = parser.parse_args() + + if 'effnetv2' in args.model: + model = eval(args.model)() + + x = torch.randn(args.batch_size, 3, 224, 224) + flops, params = profile(model, inputs=(x,), verbose=False) + print("flops = %fM" % (flops / 1e6, )) + print("param size = %fM" % (params / 1e6, )) + + if args.export: + print("exporting....") + model.eval() + torch.onnx.export(model, x, args.model+"_bs"+str(args.batch_size)+".onnx", + input_names=['input'], + output_names=['output'], + verbose=True, + opset_version=11, + operator_export_type=torch.onnx.OperatorExportTypes.ONNX) + print("exported!") diff --git a/parse_config.py b/parse_config.py new file mode 100644 index 0000000..d95b7da --- /dev/null +++ b/parse_config.py @@ -0,0 +1,59 @@ + +#################### EfficientNet V2 configs #################### +v2_base_block = [ # The baseline config for v2 models. + 'r1_k3_s1_e1_i32_o16_c1', + 'r2_k3_s2_e4_i16_o32_c1', + 'r2_k3_s2_e4_i32_o48_c1', + 'r3_k3_s2_e4_i48_o96_se0.25', + 'r5_k3_s1_e6_i96_o112_se0.25', + 'r8_k3_s2_e6_i112_o192_se0.25', +] + + +v2_s_block = [ # about base * (width1.4, depth1.8) + 'r2_k3_s1_e1_i24_o24_c1', + 'r4_k3_s2_e4_i24_o48_c1', + 'r4_k3_s2_e4_i48_o64_c1', + 'r6_k3_s2_e4_i64_o128_se0.25', + 'r9_k3_s1_e6_i128_o160_se0.25', + 'r15_k3_s2_e6_i160_o256_se0.25', +] + + +v2_m_block = [ # about base * (width1.6, depth2.2) + 'r3_k3_s1_e1_i24_o24_c1', + 'r5_k3_s2_e4_i24_o48_c1', + 'r5_k3_s2_e4_i48_o80_c1', + 'r7_k3_s2_e4_i80_o160_se0.25', + 'r14_k3_s1_e6_i160_o176_se0.25', + 'r18_k3_s2_e6_i176_o304_se0.25', + 'r5_k3_s1_e6_i304_o512_se0.25', +] + + +v2_l_block = [ # about base * (width2.0, depth3.1) + 'r4_k3_s1_e1_i32_o32_c1', + 'r7_k3_s2_e4_i32_o64_c1', + 'r7_k3_s2_e4_i64_o96_c1', + 'r10_k3_s2_e4_i96_o192_se0.25', + 'r19_k3_s1_e6_i192_o224_se0.25', + 'r25_k3_s2_e6_i224_o384_se0.25', + 'r7_k3_s1_e6_i384_o640_se0.25', +] + +v2_xl_block = [ # only for 21k pretraining. + 'r4_k3_s1_e1_i32_o32_c1', + 'r8_k3_s2_e4_i32_o64_c1', + 'r8_k3_s2_e4_i64_o96_c1', + 'r16_k3_s2_e4_i96_o192_se0.25', + 'r24_k3_s1_e6_i192_o256_se0.25', + 'r32_k3_s2_e6_i256_o512_se0.25', + 'r8_k3_s1_e6_i512_o640_se0.25', +] + +for blk in [v2_base_block, v2_s_block, v2_m_block, v2_l_block, v2_xl_block]: + cfgs = [] + for k in blk: + keys = k.split('_') + cfgs.append([int(keys[3][1:]), int(keys[5][1:]), int(keys[0][1:]), int(keys[2][1:]), 1 if 'se' in keys[-1] else 0]) + print(cfgs) From 6ed61d1e0e4e5e5fc506cb307c14428883a5cdf3 Mon Sep 17 00:00:00 2001 From: Bo Zhang Date: Mon, 17 May 2021 19:15:12 +0800 Subject: [PATCH 2/4] fix: merge conflict --- effnetv2.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/effnetv2.py b/effnetv2.py index 856de2f..b7a8a02 100644 --- a/effnetv2.py +++ b/effnetv2.py @@ -201,34 +201,6 @@ def effnetv2_m(**kwargs): } kwargs.update(settings) return EffNetV2(**kwargs) -======= - cfgs = [ - # t, c, n, s, SE - [1, 24, 2, 1, 0], - [4, 48, 4, 2, 0], - [4, 64, 4, 2, 0], - [4, 128, 6, 2, 1], - [6, 160, 9, 1, 1], - [6, 272, 15, 2, 1], - ] - return EffNetV2(cfgs, **kwargs) - - -def effnetv2_m(**kwargs): - """ - Constructs a EfficientNetV2-M model - """ - cfgs = [ - # t, c, n, s, SE - [1, 24, 3, 1, 0], - [4, 48, 5, 2, 0], - [4, 80, 5, 2, 0], - [4, 160, 7, 2, 1], - [6, 176, 14, 1, 1], - [6, 304, 18, 2, 1], - [6, 512, 5, 1, 1], - ] - return EffNetV2(cfgs, **kwargs) def effnetv2_l(**kwargs): From bedba35710cd4c928188ad5c3e17dfa6b7a19c47 Mon Sep 17 00:00:00 2001 From: Bo Zhang Date: Mon, 17 May 2021 19:16:20 +0800 Subject: [PATCH 3/4] fix: readme --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 88d6070..a212b57 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,6 @@ Reproduction of EfficientNet V2 architecture as described in [EfficientNetV2: Sm | EfficientNetV2-B2 | 10.74M | 9.0G @ 224 | | | EfficientNetV2-B3 | 14.46M | 11.9G @ 224 | | -* Flops are all measured on input (224, 224). - Stay tuned for ImageNet pre-trained weights. ## Acknowledgement From c67470ab9e5a65f388bbb525b2765b1f3ef138b3 Mon Sep 17 00:00:00 2001 From: Bo Zhang Date: Sun, 20 Feb 2022 16:57:26 +0800 Subject: [PATCH 4/4] fix: correct b0-b3 settings w.r.t. timm impl. --- README.md | 16 +++++++------- effnetv2.py | 64 ++++++++++++++++++++++++++++++++++++----------------- main.py | 39 +++++++++++++++++--------------- 3 files changed, 73 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index a212b57..5746f69 100644 --- a/README.md +++ b/README.md @@ -10,14 +10,14 @@ Reproduction of EfficientNet V2 architecture as described in [EfficientNetV2: Sm | Architecture | # Parameters | FLOPs | Top-1 Acc. (%) | | ----------------- | ------------ | ------ | -------------------------- | -| EfficientNetV2-S | 24.12M | 8.64G @ 384 | | -| EfficientNetV2-M | 55.30M | 24.74G @ 480 | | -| EfficientNetV2-L | 119.36M | 56.13G @ 384 | | -| EfficientNetV2-XL | 208.96M | 93.41G @ 512 | | -| EfficientNetV2-B0 | 7.78M | 5.9G @ 224 | | -| EfficientNetV2-B1 | 9.00M | 6.6G @ 224 | | -| EfficientNetV2-B2 | 10.74M | 9.0G @ 224 | | -| EfficientNetV2-B3 | 14.46M | 11.9G @ 224 | | +| EfficientNetV2-B0 | 7.17M | 0.79G @ 224 | | +| EfficientNetV2-B1 | 8.18M | 1.09G @ 224 | | +| EfficientNetV2-B2 | 10.37M | 1.25G @ 224 | | +| EfficientNetV2-B3 | 14.69M | 1.83G @ 224 | | +| EfficientNetV2-S | 21.10M | 2.90G @ 224 | | +| EfficientNetV2-M | 55.30M | 5.44G @ 224 | | +| EfficientNetV2-L | 119.36M | 12.32G @ 224 | | +| EfficientNetV2-XL | 208.96M | 18.02G @ 224 | | Stay tuned for ImageNet pre-trained weights. diff --git a/effnetv2.py b/effnetv2.py index b7a8a02..480ff32 100644 --- a/effnetv2.py +++ b/effnetv2.py @@ -9,12 +9,13 @@ import torch import torch.nn as nn import math +from functools import partial __all__ = ['effnetv2_s', 'effnetv2_m', 'effnetv2_l', 'effnetv2_xl', 'effnetv2_base', 'effnetv2_b0', 'effnetv2_b1', 'effnetv2_b2', 'effnetv2_b3'] -def _make_divisible(v, divisor, min_value=None): +def _make_divisible(v, divisor, min_value=None, round_limit=.9): """ This function is taken from the original tf repo. It ensures that all layers have a channel number that is divisible by 8 @@ -29,11 +30,18 @@ def _make_divisible(v, divisor, min_value=None): min_value = divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: + if new_v < round_limit * v: new_v += divisor return new_v +def round_channels(channels, multiplier=1.0, divisor=8, channel_min=None, round_limit=0.9): + """Round number of filters based on depth multiplier.""" + if not multiplier: + return channels + return _make_divisible(channels * multiplier, divisor, min_value=channel_min, round_limit=round_limit) + + # SiLU (Swish) activation function if hasattr(nn, 'SiLU'): SiLU = nn.SiLU @@ -120,13 +128,13 @@ def forward(self, x): class EffNetV2(nn.Module): - def __init__(self, num_classes=1000, width_mult=1., cfgs=None): + def __init__(self, num_classes=1000, width_mult=1., cfgs=None, stem_size=24, num_feature=1792): super(EffNetV2, self).__init__() # setting of inverted residual blocks self.cfgs = cfgs # building first layer - input_channel = _make_divisible(24 * width_mult, 8) + input_channel = _make_divisible(stem_size * width_mult, 8) layers = [conv_3x3_bn(3, input_channel, 2)] # building inverted residual blocks block = MBConv @@ -137,7 +145,7 @@ def __init__(self, num_classes=1000, width_mult=1., cfgs=None): input_channel = output_channel self.features = nn.Sequential(*layers) # building last several layers - output_channel = _make_divisible(1792 * width_mult, 8) if width_mult > 1.0 else 1792 + output_channel = _make_divisible(num_feature * width_mult, 8) if width_mult > 1.0 else num_feature self.conv = conv_1x1_bn(input_channel, output_channel) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Linear(output_channel, num_classes) @@ -186,7 +194,7 @@ def effnetv2_s(**kwargs): def effnetv2_m(**kwargs): """ - Constructs a EfficientNet V2 model + Constructs a EfficientNetV2-M model """ settings = {"cfgs": [ # t, c, n, s, SE @@ -205,7 +213,7 @@ def effnetv2_m(**kwargs): def effnetv2_l(**kwargs): """ - Constructs a EfficientNet V2 model + Constructs a EfficientNetV2-L model """ settings = {"cfgs": [ # t, c, n, s, SE @@ -224,7 +232,7 @@ def effnetv2_l(**kwargs): def effnetv2_xl(**kwargs): """ - Constructs a EfficientNet V2 model + Constructs a EfficientNetV2-XL model """ settings = {"cfgs": [ # t, c, n, s, SE @@ -243,18 +251,31 @@ def effnetv2_xl(**kwargs): def effnetv2_base(**kwargs): """ - Constructs a EfficientNet V2 model + Constructs a EfficientNetV2-Base model """ + width_mult = kwargs.pop("width_mult", 1.0) + round_chs_fn = partial(round_channels, multiplier=width_mult, round_limit=0.) + num_feature = round_chs_fn(1280) + + depth_multiplier = kwargs.pop("depth_multiplier", 1.0) + settings = {"cfgs": [ - # t, c, n, s, SE - [1, 16, 1, 1, 0], - [4, 32, 2, 2, 0], - [4, 48, 2, 2, 0], - [4, 96, 3, 2, 1], - [6, 112, 5, 1, 1], - [6, 192, 8, 2, 1], - ] + # t, c, n, s, SE + [1, 16, 1, 1, 0], + [4, 32, 2, 2, 0], + [4, 48, 2, 2, 0], + [4, 96, 3, 2, 1], + [6, 112, 5, 1, 1], + [6, 192, 8, 2, 1], + ], + "stem_size": 32, + "num_feature": num_feature, + "width_mult": width_mult } + # scale depth + for i in range(len(settings["cfgs"])): + settings["cfgs"][i][2] = int(math.ceil(depth_multiplier*settings["cfgs"][i][2])) + kwargs.update(settings) return EffNetV2(**kwargs) @@ -264,7 +285,8 @@ def effnetv2_base(**kwargs): def effnetv2_b1(**kwargs): settings = { - "width_mult": 1.1, + "depth_multiplier": 1.1, + "width_mult": 1.0, } kwargs.update(settings) return effnetv2_base(**kwargs) @@ -272,7 +294,8 @@ def effnetv2_b1(**kwargs): def effnetv2_b2(**kwargs): settings = { - "width_mult": 1.2, + "depth_multiplier": 1.2, + "width_mult": 1.1, } kwargs.update(settings) return effnetv2_base(**kwargs) @@ -280,7 +303,8 @@ def effnetv2_b2(**kwargs): def effnetv2_b3(**kwargs): settings = { - "width_mult": 1.4, + "depth_multiplier": 1.4, + "width_mult": 1.2, } kwargs.update(settings) return effnetv2_base(**kwargs) diff --git a/main.py b/main.py index bc46e92..7097232 100644 --- a/main.py +++ b/main.py @@ -22,21 +22,24 @@ def get_args_parser(): parser = argparse.ArgumentParser('generate onnx timm models', parents=[get_args_parser()]) args = parser.parse_args() - if 'effnetv2' in args.model: - model = eval(args.model)() - - x = torch.randn(args.batch_size, 3, 224, 224) - flops, params = profile(model, inputs=(x,), verbose=False) - print("flops = %fM" % (flops / 1e6, )) - print("param size = %fM" % (params / 1e6, )) - - if args.export: - print("exporting....") - model.eval() - torch.onnx.export(model, x, args.model+"_bs"+str(args.batch_size)+".onnx", - input_names=['input'], - output_names=['output'], - verbose=True, - opset_version=11, - operator_export_type=torch.onnx.OperatorExportTypes.ONNX) - print("exported!") + model_names = ['s', 'm', 'l', 'xl', 'b0', 'b1', 'b2', 'b3'] + for m in model_names: + model_name = "effnetv2_" + m + model = eval(model_name)() + print(model_name) + x = torch.randn(1, 3, 224, 224) + flops, params = profile(model, inputs=(x,), verbose=False) + print("flops = %fM" % (flops / 1e6, )) + print("param size = %fM" % (params / 1e6, )) + + if args.export: + print("exporting....") + model.eval() + x = torch.randn(args.batch_size, 3, 224, 224) + torch.onnx.export(model, x, args.model+"_bs"+str(args.batch_size)+".onnx", + input_names=['input'], + output_names=['output'], + verbose=True, + opset_version=11, + operator_export_type=torch.onnx.OperatorExportTypes.ONNX) + print("exported!")