diff --git a/README.md b/README.md index cf01c52a..f5a8ca74 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ ![Illustrating the performance of the proposed BEVDet on the nuScenes val set](./resources/nds-fps.png) ## News +* **2022.08.15** Support FP16 training for BEVDet series that with image view backbone type of ResNet. * **2022.07.29** Support BEVDepth. * **2022.07.26** Add configs and pretrained models of bevdet-r50 and bevdet4d-r50. * **2022.07.13** Support bev-pool proposed in [BEVFusion](https://github.com/mit-han-lab/bevfusion), which will speed up the training process of bevdet-tiny by +25%. @@ -16,6 +17,7 @@ ## Main Results +#### FP32 | Method | mAP | NDS | FPS |Mem (MB) | Model | Log |--------|----------|---------|--------|-------------|-----|-------| | [**BEVDet-R50**](configs/bevdet/bevdet-r50.py) | 29.9 | 37.7 | 16.7 | 5,007 | [google](https://drive.google.com/drive/folders/1VnJv-dNb6-gkKTq7uC_Q6YsRq_q3NI-t?usp=sharing) | [google](https://drive.google.com/drive/folders/1VnJv-dNb6-gkKTq7uC_Q6YsRq_q3NI-t?usp=sharing) @@ -27,6 +29,13 @@ | [**BEVDet4D-Tiny**](configs/bevdet4d/bevdet4d-sttiny.py) | 33.8 | 47.6 | 15.5 |9,255| [google](https://drive.google.com/file/d/1nyQfp7Gt-xbXDzcw5ritmFb8lvPM1H6n/view?usp=sharing) / [baidu](https://pan.baidu.com/s/1n9sVR6FnfmMccSJFTsVKfw?pwd=nzi1) | [google](https://drive.google.com/drive/folders/1VnJv-dNb6-gkKTq7uC_Q6YsRq_q3NI-t?usp=sharing) /[baidu](https://pan.baidu.com/s/1uv81CE34AhYbrz4247QcYA?pwd=k2ms) * *Thirdparty implementation, please refer to [Megvii](https://github.com/Megvii-BaseDetection/BEVDepth) for official implementation. * Memory is tested in the training process with batch 1 and without using torch.checkpoint. +#### FP16 +| Method | mAP | NDS | FPS |Mem (MB) | Model | Log +|--------|----------|---------|--------|-------------|-----|-------| +| [**BEVDet-R50-FP16**](configs/bevdet/bevdet-r50-fp16.py) | 29.6 | 37.4 | 16.7 | 3,931 | [google](https://drive.google.com/drive/folders/1VnJv-dNb6-gkKTq7uC_Q6YsRq_q3NI-t?usp=sharing) | [google](https://drive.google.com/drive/folders/1VnJv-dNb6-gkKTq7uC_Q6YsRq_q3NI-t?usp=sharing) +| [**BEVDet4D-R50-FP16**](configs/bevdet4d/bevdet4d-r50-fp16.py) | 32.7 | 46.0 | 16.7 |5,145| [google](https://drive.google.com/drive/folders/1VnJv-dNb6-gkKTq7uC_Q6YsRq_q3NI-t?usp=sharing) | [google](https://drive.google.com/drive/folders/1VnJv-dNb6-gkKTq7uC_Q6YsRq_q3NI-t?usp=sharing) +| [**BEVDepth4D-R50-FP16***](configs/bevdepth/bevdepth4d-r50-fp16.py) | 36.4 | 48.4 | 15.7 |5,361| [google](https://drive.google.com/drive/folders/1VnJv-dNb6-gkKTq7uC_Q6YsRq_q3NI-t?usp=sharing) | [google](https://drive.google.com/drive/folders/1VnJv-dNb6-gkKTq7uC_Q6YsRq_q3NI-t?usp=sharing) + ## Get Started #### Installation and Data Preparation Please see [getting_started.md](docs/getting_started.md) @@ -39,17 +48,17 @@ python tools/analysis_tools/benchmark.py configs/bevdet/bevdet-sttiny-accelerate python tools/analysis_tools/benchmark.py configs/bevdet/bevdet-sttiny.py $checkpoint ``` #### Estimate the flops of BEVDet -For bevdet4d, the FLOP result involves the current frame only. +Note: For bevdet4d, the FLOP result involves the current frame only. ```shell python tools/analysis_tools/get_flops.py configs/bevdet/bevdet-sttiny.py --shape 256 704 python tools/analysis_tools/get_flops.py configs/bevdet4d/bevdet4d-sttiny.py --shape 256 704 ``` #### Visualize the predicted result with open3d. -**Official implementation. (Visualization locally only)** +* Official implementation. (Visualization locally only) ```shell python tools/test.py $config $checkpoint --show --show-dir $save-path ``` -**Private implementation. (Visualization remotely/locally)** +* Private implementation. (Visualization remotely/locally) ```shell python tools/test.py $config $checkpoint --format-only --eval-options jsonfile_prefix=$savepath python tools/analysis_tools/vis.py $savepath/pts_bbox/results_nusc.json diff --git a/configs/bevdepth/bevdepth4d-r50-fp16.py b/configs/bevdepth/bevdepth4d-r50-fp16.py new file mode 100644 index 00000000..cf586f36 --- /dev/null +++ b/configs/bevdepth/bevdepth4d-r50-fp16.py @@ -0,0 +1,9 @@ +# Copyright (c) Phigent Robotics. All rights reserved. + +_base_ = ['./bevdepth4d-r50.py'] + +# avoid the type error of the bias of DCNv2 +model = dict( + img_view_transformer=dict(dcn_config=dict(bias=False))) + +fp16 = dict(loss_scale='dynamic') \ No newline at end of file diff --git a/configs/bevdet/bevdet-r50-fp16.py b/configs/bevdet/bevdet-r50-fp16.py new file mode 100644 index 00000000..ecb8ac2e --- /dev/null +++ b/configs/bevdet/bevdet-r50-fp16.py @@ -0,0 +1,5 @@ +# Copyright (c) Phigent Robotics. All rights reserved. + +_base_ = ['./bevdet-r50.py'] + +fp16 = dict(loss_scale='dynamic') \ No newline at end of file diff --git a/configs/bevdet4d/bevdet4d-r50-fp16.py b/configs/bevdet4d/bevdet4d-r50-fp16.py new file mode 100644 index 00000000..08d1affd --- /dev/null +++ b/configs/bevdet4d/bevdet4d-r50-fp16.py @@ -0,0 +1,5 @@ +# Copyright (c) Phigent Robotics. All rights reserved. + +_base_ = ['./bevdet4d-r50.py'] + +fp16 = dict(loss_scale='dynamic') \ No newline at end of file diff --git a/mmdet3d/datasets/nuscenes_dataset.py b/mmdet3d/datasets/nuscenes_dataset.py index f8fde8ff..3276eb74 100644 --- a/mmdet3d/datasets/nuscenes_dataset.py +++ b/mmdet3d/datasets/nuscenes_dataset.py @@ -307,7 +307,7 @@ def get_data_info(self, index): if self.img_info_prototype == 'bevdet_sequential': bbox = input_dict['ann_info']['gt_bboxes_3d'].tensor if 'abs' in self.speed_mode: - bbox[:, 7:9] = bbox[:, 7:9] + torch.from_numpy(info['velo']).view(1,2) + bbox[:, 7:9] = bbox[:, 7:9] + torch.from_numpy(info['velo']).view(1,2).to(bbox) if input_dict['adjacent_type'] == 'next' and not self.fix_direction: bbox[:, 7:9] = -bbox[:, 7:9] if 'dis' in self.speed_mode: diff --git a/mmdet3d/models/detectors/bevdet.py b/mmdet3d/models/detectors/bevdet.py index e82f6939..2d77bddf 100644 --- a/mmdet3d/models/detectors/bevdet.py +++ b/mmdet3d/models/detectors/bevdet.py @@ -1,13 +1,12 @@ # Copyright (c) Phigent Robotics. All rights reserved. -import os import torch +from mmcv.runner import force_fp32 import torch.nn.functional as F from mmdet.models import DETECTORS from .centerpoint import CenterPoint from .. import builder -from mmdet3d.core import bbox3d2result @DETECTORS.register_module() @@ -236,6 +235,7 @@ def __init__(self, before=False, interpolation_mode='bilinear',**kwargs): self.before=before self.interpolation_mode=interpolation_mode + @force_fp32() def shift_feature(self, input, trans, rots): n, c, h, w = input.shape _,v,_ =trans[0].shape @@ -358,6 +358,7 @@ def simple_test(self, points, img_metas, img=None, rescale=False): result_dict['pts_bbox'] = pts_bbox return bbox_list + @force_fp32() def get_depth_loss(self, depth_gt, depth): B, N, H, W = depth_gt.shape loss_weight = (~(depth_gt == 0)).reshape(B, N, 1, H, W).expand(B, N, diff --git a/mmdet3d/models/necks/view_transformer.py b/mmdet3d/models/necks/view_transformer.py index c647f9ae..9f6bd733 100644 --- a/mmdet3d/models/necks/view_transformer.py +++ b/mmdet3d/models/necks/view_transformer.py @@ -292,7 +292,8 @@ def forward(self, x, cam_params): @NECKS.register_module() class ViewTransformerLSSBEVDepth(ViewTransformerLiftSplatShoot): - def __init__(self, extra_depth_net, loss_depth_weight, se_config=dict(), **kwargs): + def __init__(self, extra_depth_net, loss_depth_weight, se_config=dict(), + dcn_config=dict(bias=True), **kwargs): super(ViewTransformerLSSBEVDepth, self).__init__(**kwargs) self.loss_depth_weight = loss_depth_weight self.extra_depthnet = builder.build_backbone(extra_depth_net) @@ -312,7 +313,7 @@ def __init__(self, extra_depth_net, loss_depth_weight, se_config=dict(), **kwarg stride=1, padding=1, dilation=1, - bias=True), + **dcn_config), nn.BatchNorm2d(extra_depth_net['num_channels'][0]) ]) self.se = SELikeModule(self.numC_input,