diff --git a/ppcls/arch/backbone/model_zoo/vqkd.py b/ppcls/arch/backbone/model_zoo/vqkd.py index 08140d4bb4..5a5598f4fc 100644 --- a/ppcls/arch/backbone/model_zoo/vqkd.py +++ b/ppcls/arch/backbone/model_zoo/vqkd.py @@ -49,7 +49,6 @@ def __init__(self, **kwargs ): super().__init__() - print(kwargs) if decoder_config['in_chans'] != embed_dim: print(f"Rewrite the in_chans in decoder from {decoder_config['in_chans']} to {embed_dim}") decoder_config['in_chans'] = embed_dim diff --git a/ppcls/configs/ImageNet/BeitV2/BeitV2_base_patch16_224_ft.yaml b/ppcls/configs/ImageNet/BeitV2/BeitV2_base_patch16_224_ft.yaml index 2b72bdf7d5..a5e231997e 100644 --- a/ppcls/configs/ImageNet/BeitV2/BeitV2_base_patch16_224_ft.yaml +++ b/ppcls/configs/ImageNet/BeitV2/BeitV2_base_patch16_224_ft.yaml @@ -17,6 +17,13 @@ Global: seed: 0 distributed: 4 +AMP: + scale_loss: 65536.0 + use_dynamic_loss_scaling: True + incr_every_n_steps: 2000 + # O1: mixed fp16 + level: O1 + # model architecture Arch: name: beit_base_patch16_224 diff --git a/ppcls/configs/ImageNet/BeitV2/BeitV2_base_patch16_224_pt.yaml b/ppcls/configs/ImageNet/BeitV2/BeitV2_base_patch16_224_pt.yaml index eacaeea976..f092c9a514 100644 --- a/ppcls/configs/ImageNet/BeitV2/BeitV2_base_patch16_224_pt.yaml +++ b/ppcls/configs/ImageNet/BeitV2/BeitV2_base_patch16_224_pt.yaml @@ -29,7 +29,7 @@ AMP: Arch: name: "Beitv2Model" drop_path_rate : 0.1 - class_num: &class_num 1000 + class_num: 1000 is_beitv2: True # if not null, its lengths should be same as models pretrained_list: @@ -42,7 +42,7 @@ Arch: - Teacher: name: vqkd_encoder_base_decoder_3x768x12_clip pretrained: True - pretrained_weight: /home/aistudio/weight/vqkd.pdparams + pretrained_weight: ./dataset/vqkd.pdparams as_tokenzer: False img_size: 224 n_code: 8192 @@ -50,7 +50,7 @@ Arch: - Student: name: beit_base_patch16_224_8k_vocab_cls_pt pretrained: True - pretrained_weight: /home/aistudio/weight/pretrain_model.pdparams + pretrained_weight: ./dataset/pretrain_model.pdparams drop_path_rate: 0.1 use_shared_rel_pos_bias: True use_abs_pos_emb: False @@ -75,14 +75,14 @@ Optimizer: beta1: 0.9 beta2: 0.98 momentum: 0.9 - weight_decay: 1e-4 - epsilon: 1e-8 + weight_decay: 0.05 #multi precision: True no_weight_decay_name: pos_embed cls_token .bias norm gamma + one_dim_param_no_weight_decay: True # Ir自定义 lr: name: Cosine - learning_rate: 0.0015 + learning_rate: 1.5e-3 T_max: 200 eta_min: 1e-5 warmup_steps: 10 @@ -94,8 +94,8 @@ DataLoader: Train: dataset: name: BEiT_ImageNet - image_root: /home/aistudio/data/data89857/ILSVRC2012mini/ - cls_label_path: /home/aistudio/data/data89857/ILSVRC2012mini/train_list.txt + image_root: ./dataset/ILSVRC2012/ + cls_label_path: ./dataset/ILSVRC2012/train_list.txt transform_ops: - DecodeImage: to_rgb: True, @@ -135,31 +135,3 @@ DataLoader: loader: num_workers: 4 use_shared_memory: True - - Eval: - dataset: - name: ImageNetDataset - image_root: /home/aistudio/data/data89857/ILSVRC2012mini/val/ - cls_label_path: /home/aistudio/data/data89857/ILSVRC2012mini/val_list.txt - transform_ops: - - DecodeImage: - to_rgb: True - channel_first: False - - ResizeImage: - resize_short: 256 - - CropImage: - size: 224 - - NormalizeImage: - scale: 1.0/255.0 - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - order: '' - sampler: - name: DistributedBatchSampler - batch_size: 256 - drop_last: False - shuffle: False - loader: - num_workers: 8 - use_shared_memory: True - diff --git a/ppcls/engine/train/train.py b/ppcls/engine/train/train.py index b93cc01d82..5d638d79c2 100644 --- a/ppcls/engine/train/train.py +++ b/ppcls/engine/train/train.py @@ -70,7 +70,8 @@ def train_epoch(engine, epoch_id, print_batch_step): scaled.backward() if (iter_id + 1) % engine.update_freq == 0: for i in range(len(engine.optimizer)): - engine.scaler.minimize(engine.optimizer[i], scaled) + engine.scaler.step(engine.optimizer[i]) + engine.scaler.update() else: loss.backward() if (iter_id + 1) % engine.update_freq == 0: diff --git a/ppcls/optimizer/optimizer.py b/ppcls/optimizer/optimizer.py index 70d81efd67..31b8c01892 100644 --- a/ppcls/optimizer/optimizer.py +++ b/ppcls/optimizer/optimizer.py @@ -341,7 +341,6 @@ def _get_parameter_groups(self, model, weight_decay=0.05, skip_list=(), get_num_ parameter_group_names = {} parameter_group_vars = {} for name, param in model.named_parameters(): - print(param.stop_gradient) if param.stop_gradient: continue diff --git a/requirements.txt b/requirements.txt index b46ed61faf..1179411ed7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ scikit-learn>=0.21.0 gast==0.3.3 faiss-cpu easydict +einops