Skip to content

Commit

Permalink
support configuring tokenizer padding
Browse files Browse the repository at this point in the history
  • Loading branch information
SeanLee97 committed Aug 25, 2024
1 parent 4224d13 commit 8af4d8a
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions angle_emb/angle_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@
help='Specify logging_steps, defaut 100')
parser.add_argument('--pooling_strategy', type=str, default='cls',
help='Specify pooling_strategy from [`cls`, `last`, `avg`, `cls_avg`, `max`], default `cls`')
parser.add_argument('--tokenizer_padding_side', type=str, default=None, choices=['left', 'right'],
help='specify tokenizer padding side from [`left`, `right`], default None')
parser.add_argument('--epochs', type=int, default=10, help='Specify epochs, default 10')
parser.add_argument('--max_steps', type=int, default=-1,
help='Specify max steps, default -1 (Automatically calculated from epochs)')
Expand All @@ -101,6 +99,11 @@
help='Specify hub_private_repo, default 1')
parser.add_argument('--hub_model_id', type=str, default=None,
help='Specify hub_model_id, default None, format like organization/model_id')
# configure tokenizer
parser.add_argument('--tokenizer_padding', type=str, default="longest", choices=['longest', 'max_length'],
help='Specify tokenizer padding from [`longest`, `max_length`], default `longest`')
parser.add_argument('--tokenizer_padding_side', type=str, default=None, choices=['left', 'right'],
help='specify tokenizer padding side from [`left`, `right`], default None')
# configure LLM
parser.add_argument('--is_llm', type=int, default=0, choices=[0, 1],
help='Specify is_llm, choices [0, 1], defaut 0')
Expand Down Expand Up @@ -272,6 +275,7 @@ def main():
apply_ese=args.apply_ese,
trainer_kwargs=trainer_kwargs,
coword_random_mask_rate=args.coword_random_mask_rate,
padding=args.tokenizer_padding,
)


Expand Down

0 comments on commit 8af4d8a

Please sign in to comment.