Cuda out of memory! #16

sunset233 · 2021-07-06T09:06:18Z

I reproduce the paper and an error occured, I use the prompt nvidia-smi,my GPU have 14% usage. Then what should I do to solve the problem?
I will be appreciate if you can help me.
The detail of the problem is in the following:

(lph) g303@g303:~/lph/AlignPS-master$ /bin/bash /home/g303/lph/AlignPS-master/run_train.sh
2021-07-06 16:59:41,544 - mmdet - INFO - Environment info:

sys.platform: linux
Python: 3.8.10 (default, Jun 4 2021, 15:09:15) [GCC 7.5.0]
CUDA available: True
CUDA_HOME: /usr/local/cuda
NVCC: Build cuda_11.3.r11.3/compiler.29920130_0
GPU 0: NVIDIA GeForce RTX 2070 SUPER
GCC: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
PyTorch: 1.7.0+cu110
PyTorch compiling details: PyTorch built with:

GCC 7.3
C++ Version: 201402
Intel(R) oneAPI Math Kernel Library Version 2021.2-Product Build 20210312 for Intel(R) 64 architecture applications
Intel(R) MKL-DNN v1.6.0 (Git Hash 5ef631a030a6f73131c77892041042805a06064f)
OpenMP 201511 (a.k.a. OpenMP 4.5)
NNPACK is enabled
CPU capability usage: AVX2
CUDA Runtime 11.0
NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80
CuDNN 8.0.4
Magma 2.5.2
Build settings: BLAS=MKL, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN_WRAPPER -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,

TorchVision: 0.8.1+cu110
OpenCV: 4.5.2
MMCV: 1.1.5
MMDetection: 2.4.0+unknown
MMDetection Compiler: GCC 7.5
MMDetection CUDA Compiler: 11.3

2021-07-06 16:59:42,319 - mmdet - INFO - Distributed training: False
2021-07-06 16:59:43,098 - mmdet - INFO - Config:
dataset_type = 'CuhkDataset'
data_root = '/home/g303/lph/datasets/PRW-v16.04.20/'
img_norm_cfg = dict(
mean=[103.53, 116.28, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Resize',
img_scale=[(667, 400), (1000, 600), (1333, 800), (1500, 900),
(1666, 1000), (2000, 1200)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_ids'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1500, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type='CuhkDataset',
ann_file='/home/g303/lph/datasets/PRW-v16.04.20/train_pid.json',
img_prefix='/home/g303/lph/datasets/PRW-v16.04.20/frames/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Resize',
img_scale=[(667, 400), (1000, 600), (1333, 800), (1500, 900),
(1666, 1000), (2000, 1200)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(
type='Collect',
keys=['img', 'gt_bboxes', 'gt_labels', 'gt_ids'])
]),
val=dict(
type='CuhkDataset',
ann_file='/home/g303/lph/datasets/PRW-v16.04.20/test_pid.json',
img_prefix='/home/g303/lph/datasets/PRW-v16.04.20/frames/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1500, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]),
test=dict(
type='CuhkDataset',
ann_file='/home/g303/lph/datasets/PRW-v16.04.20/test_pid.json',
img_prefix='/home/g303/lph/datasets/PRW-v16.04.20/frames/',
proposal_file=
'/home/g303/lph/datasets/PRW-v16.04.20/annotation/test/train_test/TestG50.mat',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1500, 900),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]))
evaluation = dict(interval=1, metric='bbox')
optimizer = dict(
type='SGD',
lr=0.001,
momentum=0.9,
weight_decay=0.001,
paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0))
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.3333333333333333,
step=[16, 22])
total_epochs = 24
checkpoint_config = dict(interval=1)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
model = dict(
type='FCOSReid',
pretrained='open-mmlab://detectron2/resnet50_caffe',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPNDcnLconv3Dcn',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs=True,
extra_convs_on_inputs=False,
num_outs=5,
relu_before_extra_convs=True),
bbox_head=dict(
type='FCOSReidHeadFocalOimSub',
num_classes=1,
in_channels=256,
stacked_convs=4,
feat_channels=256,
strides=[8, 16, 32, 64, 128],
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='GIoULoss', loss_weight=1.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
unlabel_weight=10,
temperature=15,
label_norm=True,
num_person=483,
queue_size=500,
norm_on_bbox=True,
centerness_on_reg=True,
dcn_on_last_conv=True,
center_sampling=True,
conv_bias=True))
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1),
allowed_border=-1,
pos_weight=-1,
debug=False)
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100)
work_dir = './work_dirs/prw_base_focal_labelnorm_sub_ldcn_fg15_wd1-3'
gpu_ids = [0]

/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/mmcv/cnn/bricks/conv_module.py:100: UserWarning: ConvModule has norm and bias at the same time
warnings.warn('ConvModule has norm and bias at the same time')
2021-07-06 16:59:43,348 - mmdet - INFO - load model from: open-mmlab://detectron2/resnet50_caffe
2021-07-06 16:59:43,409 - mmdet - WARNING - The model and loaded state dict do not match exactly

unexpected key in source state_dict: conv1.bias

loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
2021-07-06 16:59:45,482 - mmdet - INFO - Start running, host: g303@g303, work_dir: /home/g303/lph/AlignPS-master/work_dirs/prw_base_focal_labelnorm_sub_ldcn_fg15_wd1-3
2021-07-06 16:59:45,482 - mmdet - INFO - workflow: [('train', 1)], max: 24 epochs
/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/torch/nn/functional.py:2952: UserWarning: nn.functional.upsample is deprecated. Use nn.functional.interpolate instead.
warnings.warn("nn.functional.upsample is deprecated. Use nn.functional.interpolate instead.")
/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/torch/nn/functional.py:3060: UserWarning: Default upsampling behavior when mode=bilinear is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.
warnings.warn("Default upsampling behavior when mode={} is changed "
/home/g303/lph/AlignPS-master/mmdet/models/dense_heads/fcos_reid_head_focal_oim_sub.py:316: UserWarning: This overload of nonzero is deprecated:
nonzero()
Consider using one of the following signatures instead:
nonzero(*, bool as_tuple) (Triggered internally at /pytorch/torch/csrc/utils/python_arg_parser.cpp:882.)
pos_inds = ((flatten_labels >= 0)
Traceback (most recent call last):
File "tools/train.py", line 177, in
main()
File "tools/train.py", line 166, in main
train_detector(
File "/home/g303/lph/AlignPS-master/mmdet/apis/train.py", line 147, in train_detector
runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
File "/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 125, in run
epoch_runner(data_loaders[i], **kwargs)
File "/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 50, in train
self.run_iter(data_batch, train_mode=True)
File "/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 29, in run_iter
outputs = self.model.train_step(data_batch, self.optimizer,
File "/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/mmcv/parallel/data_parallel.py", line 67, in train_step
return self.module.train_step(*inputs[0], **kwargs[0])
File "/home/g303/lph/AlignPS-master/mmdet/models/detectors/base.py", line 234, in train_step
losses = self(**data)
File "/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/g303/lph/AlignPS-master/mmdet/core/fp16/decorators.py", line 51, in new_func
return old_func(*args, **kwargs)
File "/home/g303/lph/AlignPS-master/mmdet/models/detectors/base.py", line 168, in forward
return self.forward_train(img, img_metas, **kwargs)
File "/home/g303/lph/AlignPS-master/mmdet/models/detectors/single_stage_reid.py", line 94, in forward_train
x = self.extract_feat(img)
File "/home/g303/lph/AlignPS-master/mmdet/models/detectors/single_stage_reid.py", line 56, in extract_feat
x = self.neck(x)
File "/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/g303/lph/AlignPS-master/mmdet/core/fp16/decorators.py", line 51, in new_func
return old_func(*args, **kwargs)
File "/home/g303/lph/AlignPS-master/mmdet/models/necks/fpn_dcn_lconv3_dcn.py", line 203, in forward
outs = [
File "/home/g303/lph/AlignPS-master/mmdet/models/necks/fpn_dcn_lconv3_dcn.py", line 204, in
self.fpn_convsi for i in range(used_backbone_levels)
File "/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/mmcv/ops/deform_conv.py", line 288, in forward
return deform_conv2d(x, offset, self.weight, self.stride, self.padding,
File "/home/g303/anaconda3/envs/lph/lib/python3.8/site-packages/mmcv/ops/deform_conv.py", line 73, in forward
ext_module.deform_conv_forward(
RuntimeError: CUDA out of memory. Tried to allocate 2.01 GiB (GPU 0; 7.79 GiB total capacity; 4.92 GiB already allocated; 164.56 MiB free; 6.21 GiB reserved in total by PyTorch)

The text was updated successfully, but these errors were encountered:

daodaofr · 2021-07-06T09:39:23Z

please try to use a larger gpu, or try distributed training.

please refer to #4

daodaofr closed this as completed Jul 6, 2021

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Cuda out of memory! #16

Cuda out of memory! #16

sunset233 commented Jul 6, 2021

daodaofr commented Jul 6, 2021

Cuda out of memory! #16

Cuda out of memory! #16

Comments

sunset233 commented Jul 6, 2021

(lph) g303@g303:~/lph/AlignPS-master$ /bin/bash /home/g303/lph/AlignPS-master/run_train.sh 2021-07-06 16:59:41,544 - mmdet - INFO - Environment info:

TorchVision: 0.8.1+cu110 OpenCV: 4.5.2 MMCV: 1.1.5 MMDetection: 2.4.0+unknown MMDetection Compiler: GCC 7.5 MMDetection CUDA Compiler: 11.3

daodaofr commented Jul 6, 2021

(lph) g303@g303:~/lph/AlignPS-master$ /bin/bash /home/g303/lph/AlignPS-master/run_train.sh
2021-07-06 16:59:41,544 - mmdet - INFO - Environment info:

TorchVision: 0.8.1+cu110
OpenCV: 4.5.2
MMCV: 1.1.5
MMDetection: 2.4.0+unknown
MMDetection Compiler: GCC 7.5
MMDetection CUDA Compiler: 11.3