Add files

hysts · Oct 19, 2021 · 285675b · 285675b
1 parent 31c19f8
commit 285675b
Show file tree

Hide file tree

Showing 9 changed files with 563 additions and 0 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,4 @@
+include LICENSE
+include README.md
+include requirements.txt
+recursive-include configs *.py
diff --git a/README.md b/README.md
@@ -1,2 +1,5 @@
 # Anime Face Detector
 
+This is an anime face detector using
+[mmdetection](https://github.com/open-mmlab/mmdetection)
+and [mmpose](https://github.com/open-mmlab/mmpose).
diff --git a/anime_face_detector/__init__.py b/anime_face_detector/__init__.py
@@ -0,0 +1,14 @@
+import pathlib
+
+from .detector import LandmarkDetector
+
+
+def get_config_path(model_name: str) -> pathlib.Path:
+    assert model_name in ['faster-rcnn', 'yolov3', 'hrnetv2']
+
+    package_path = pathlib.Path(__file__).parent.resolve()
+    if model_name in ['faster-rcnn', 'yolov3']:
+        config_dir = package_path / 'configs' / 'mmdet'
+    else:
+        config_dir = package_path / 'configs' / 'mmpose'
+    return config_dir / f'{model_name}.py'
diff --git a/anime_face_detector/configs/mmdet/faster-rcnn.py b/anime_face_detector/configs/mmdet/faster-rcnn.py
@@ -0,0 +1,66 @@
+model = dict(type='FasterRCNN',
+             backbone=dict(type='ResNet',
+                           depth=50,
+                           num_stages=4,
+                           out_indices=(0, 1, 2, 3),
+                           frozen_stages=1,
+                           norm_cfg=dict(type='BN', requires_grad=True),
+                           norm_eval=True,
+                           style='pytorch'),
+             neck=dict(type='FPN',
+                       in_channels=[256, 512, 1024, 2048],
+                       out_channels=256,
+                       num_outs=5),
+             rpn_head=dict(type='RPNHead',
+                           in_channels=256,
+                           feat_channels=256,
+                           anchor_generator=dict(type='AnchorGenerator',
+                                                 scales=[8],
+                                                 ratios=[0.5, 1.0, 2.0],
+                                                 strides=[4, 8, 16, 32, 64]),
+                           bbox_coder=dict(type='DeltaXYWHBBoxCoder',
+                                           target_means=[0.0, 0.0, 0.0, 0.0],
+                                           target_stds=[1.0, 1.0, 1.0, 1.0])),
+             roi_head=dict(
+                 type='StandardRoIHead',
+                 bbox_roi_extractor=dict(type='SingleRoIExtractor',
+                                         roi_layer=dict(type='RoIAlign',
+                                                        output_size=7,
+                                                        sampling_ratio=0),
+                                         out_channels=256,
+                                         featmap_strides=[4, 8, 16, 32]),
+                 bbox_head=dict(type='Shared2FCBBoxHead',
+                                in_channels=256,
+                                fc_out_channels=1024,
+                                roi_feat_size=7,
+                                num_classes=1,
+                                bbox_coder=dict(
+                                    type='DeltaXYWHBBoxCoder',
+                                    target_means=[0.0, 0.0, 0.0, 0.0],
+                                    target_stds=[0.1, 0.1, 0.2, 0.2]),
+                                reg_class_agnostic=False)),
+             test_cfg=dict(rpn=dict(nms_pre=1000,
+                                    max_per_img=1000,
+                                    nms=dict(type='nms', iou_threshold=0.7),
+                                    min_bbox_size=0),
+                           rcnn=dict(score_thr=0.05,
+                                     nms=dict(type='nms', iou_threshold=0.5),
+                                     max_per_img=100)))
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='MultiScaleFlipAug',
+         img_scale=(1333, 800),
+         flip=False,
+         transforms=[
+             dict(type='Resize', keep_ratio=True),
+             dict(type='RandomFlip'),
+             dict(type='Normalize',
+                  mean=[123.675, 116.28, 103.53],
+                  std=[58.395, 57.12, 57.375],
+                  to_rgb=True),
+             dict(type='Pad', size_divisor=32),
+             dict(type='ImageToTensor', keys=['img']),
+             dict(type='Collect', keys=['img'])
+         ])
+]
+data = dict(test=dict(pipeline=test_pipeline))
diff --git a/anime_face_detector/configs/mmdet/yolov3.py b/anime_face_detector/configs/mmdet/yolov3.py
@@ -0,0 +1,47 @@
+model = dict(type='YOLOV3',
+             backbone=dict(type='Darknet', depth=53, out_indices=(3, 4, 5)),
+             neck=dict(type='YOLOV3Neck',
+                       num_scales=3,
+                       in_channels=[1024, 512, 256],
+                       out_channels=[512, 256, 128]),
+             bbox_head=dict(type='YOLOV3Head',
+                            num_classes=1,
+                            in_channels=[512, 256, 128],
+                            out_channels=[1024, 512, 256],
+                            anchor_generator=dict(type='YOLOAnchorGenerator',
+                                                  base_sizes=[[(116, 90),
+                                                               (156, 198),
+                                                               (373, 326)],
+                                                              [(30, 61),
+                                                               (62, 45),
+                                                               (59, 119)],
+                                                              [(10, 13),
+                                                               (16, 30),
+                                                               (33, 23)]],
+                                                  strides=[32, 16, 8]),
+                            bbox_coder=dict(type='YOLOBBoxCoder'),
+                            featmap_strides=[32, 16, 8]),
+             test_cfg=dict(nms_pre=1000,
+                           min_bbox_size=0,
+                           score_thr=0.05,
+                           conf_thr=0.005,
+                           nms=dict(type='nms', iou_threshold=0.45),
+                           max_per_img=100))
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='MultiScaleFlipAug',
+         img_scale=(608, 608),
+         flip=False,
+         transforms=[
+             dict(type='Resize', keep_ratio=True),
+             dict(type='RandomFlip'),
+             dict(type='Normalize',
+                  mean=[0, 0, 0],
+                  std=[255.0, 255.0, 255.0],
+                  to_rgb=True),
+             dict(type='Pad', size_divisor=32),
+             dict(type='ImageToTensor', keys=['img']),
+             dict(type='Collect', keys=['img'])
+         ])
+]
+data = dict(test=dict(pipeline=test_pipeline))
diff --git a/anime_face_detector/configs/mmpose/hrnetv2.py b/anime_face_detector/configs/mmpose/hrnetv2.py
@@ -0,0 +1,250 @@
+channel_cfg = dict(num_output_channels=28,
+                   dataset_joints=28,
+                   dataset_channel=[
+                       list(range(28)),
+                   ],
+                   inference_channel=list(range(28)))
+
+model = dict(
+    type='TopDown',
+    backbone=dict(type='HRNet',
+                  in_channels=3,
+                  extra=dict(stage1=dict(num_modules=1,
+                                         num_branches=1,
+                                         block='BOTTLENECK',
+                                         num_blocks=(4, ),
+                                         num_channels=(64, )),
+                             stage2=dict(num_modules=1,
+                                         num_branches=2,
+                                         block='BASIC',
+                                         num_blocks=(4, 4),
+                                         num_channels=(18, 36)),
+                             stage3=dict(num_modules=4,
+                                         num_branches=3,
+                                         block='BASIC',
+                                         num_blocks=(4, 4, 4),
+                                         num_channels=(18, 36, 72)),
+                             stage4=dict(num_modules=3,
+                                         num_branches=4,
+                                         block='BASIC',
+                                         num_blocks=(4, 4, 4, 4),
+                                         num_channels=(18, 36, 72, 144),
+                                         multiscale_output=True),
+                             upsample=dict(mode='bilinear',
+                                           align_corners=False))),
+    keypoint_head=dict(type='TopdownHeatmapSimpleHead',
+                       in_channels=[18, 36, 72, 144],
+                       in_index=(0, 1, 2, 3),
+                       input_transform='resize_concat',
+                       out_channels=channel_cfg['num_output_channels'],
+                       num_deconv_layers=0,
+                       extra=dict(final_conv_kernel=1,
+                                  num_conv_layers=1,
+                                  num_conv_kernels=(1, )),
+                       loss_keypoint=dict(type='JointsMSELoss',
+                                          use_target_weight=True)),
+    test_cfg=dict(flip_test=True,
+                  post_process='unbiased',
+                  shift_heatmap=True,
+                  modulate_kernel=11))
+
+data_cfg = dict(image_size=[256, 256],
+                heatmap_size=[64, 64],
+                num_output_channels=channel_cfg['num_output_channels'],
+                num_joints=channel_cfg['dataset_joints'],
+                dataset_channel=channel_cfg['dataset_channel'],
+                inference_channel=channel_cfg['inference_channel'])
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='TopDownAffine'),
+    dict(type='ToTensor'),
+    dict(type='NormalizeTensor',
+         mean=[0.485, 0.456, 0.406],
+         std=[0.229, 0.224, 0.225]),
+    dict(type='Collect',
+         keys=['img'],
+         meta_keys=['image_file', 'center', 'scale', 'rotation',
+                    'flip_pairs']),
+]
+
+dataset_info = dict(dataset_name='anime_face',
+                    paper_info=dict(),
+                    keypoint_info={
+                        0:
+                        dict(name='kpt-0',
+                             id=0,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-4'),
+                        1:
+                        dict(name='kpt-1',
+                             id=1,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-3'),
+                        2:
+                        dict(name='kpt-2',
+                             id=2,
+                             color=[255, 255, 255],
+                             type='',
+                             swap=''),
+                        3:
+                        dict(name='kpt-3',
+                             id=3,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-1'),
+                        4:
+                        dict(name='kpt-4',
+                             id=4,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-0'),
+                        5:
+                        dict(name='kpt-5',
+                             id=5,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-10'),
+                        6:
+                        dict(name='kpt-6',
+                             id=6,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-9'),
+                        7:
+                        dict(name='kpt-7',
+                             id=7,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-8'),
+                        8:
+                        dict(name='kpt-8',
+                             id=8,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-7'),
+                        9:
+                        dict(name='kpt-9',
+                             id=9,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-6'),
+                        10:
+                        dict(name='kpt-10',
+                             id=10,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-5'),
+                        11:
+                        dict(name='kpt-11',
+                             id=11,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-19'),
+                        12:
+                        dict(name='kpt-12',
+                             id=12,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-18'),
+                        13:
+                        dict(name='kpt-13',
+                             id=13,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-17'),
+                        14:
+                        dict(name='kpt-14',
+                             id=14,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-22'),
+                        15:
+                        dict(name='kpt-15',
+                             id=15,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-21'),
+                        16:
+                        dict(name='kpt-16',
+                             id=16,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-20'),
+                        17:
+                        dict(name='kpt-17',
+                             id=17,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-13'),
+                        18:
+                        dict(name='kpt-18',
+                             id=18,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-12'),
+                        19:
+                        dict(name='kpt-19',
+                             id=19,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-11'),
+                        20:
+                        dict(name='kpt-20',
+                             id=20,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-16'),
+                        21:
+                        dict(name='kpt-21',
+                             id=21,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-15'),
+                        22:
+                        dict(name='kpt-22',
+                             id=22,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-14'),
+                        23:
+                        dict(name='kpt-23',
+                             id=23,
+                             color=[255, 255, 255],
+                             type='',
+                             swap=''),
+                        24:
+                        dict(name='kpt-24',
+                             id=24,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-26'),
+                        25:
+                        dict(name='kpt-25',
+                             id=25,
+                             color=[255, 255, 255],
+                             type='',
+                             swap=''),
+                        26:
+                        dict(name='kpt-26',
+                             id=26,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='kpt-24'),
+                        27:
+                        dict(name='kpt-27',
+                             id=27,
+                             color=[255, 255, 255],
+                             type='',
+                             swap='')
+                    },
+                    skeleton_info={},
+                    joint_weights=[1.] * 28,
+                    sigmas=[])
+
+data = dict(test=dict(type='',
+                      data_cfg=data_cfg,
+                      pipeline=test_pipeline,
+                      dataset_info=dataset_info), )