Skip to content

Commit

Permalink
added t5 multipack
Browse files Browse the repository at this point in the history
  • Loading branch information
huseinzol05 committed Oct 15, 2024
1 parent 07b82b1 commit 44e087e
Show file tree
Hide file tree
Showing 3 changed files with 648 additions and 0 deletions.
25 changes: 25 additions & 0 deletions session/translation/end-to-end/nanot5-base-multipack.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
WANDB_PROJECT="nanot5-base-malaysian-cased-translation-v4-multipack" \
/root/app/venv/bin/torchrun \
--nproc_per_node 1 \
-m run_t5_multipack \
--model_name_or_path mesolitica/nanot5-base-malaysian-translation-v2 \
--num_train_epochs 2 \
--eval_steps 1000000000 \
--logging_steps 2 \
--save_steps 200 \
--save_total_limit 3 \
--do_train \
--train_file malaysian-translation-v2-multipack-2048 \
--output_dir nanot5-base-malaysian-cased-translation-v4-packing \
--dataloader_num_workers=5 \
--per_device_train_batch_size=1 \
--per_device_eval_batch_size=3 \
--gradient_accumulation_steps=16 \
--max_source_length 2048 \
--max_target_length 2048 \
--learning_rate 2e-5 \
--gradient_checkpointing false \
--weight_decay 0.01 \
--bf16 \
--ddp_find_unused_parameters true \
--dataloader_pin_memory false
24 changes: 24 additions & 0 deletions session/translation/end-to-end/nanot5-small-multipack.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
WANDB_PROJECT="nanot5-small-malaysian-cased-translation-v4-multipack" \
torchrun \
--nproc_per_node 1 \
-m run_t5_multipack \
--model_name_or_path mesolitica/nanot5-small-malaysian-translation-v2 \
--num_train_epochs 2 \
--eval_steps 1000000000 \
--logging_steps 2 \
--save_steps 200 \
--save_total_limit 3 \
--do_train \
--train_file /home/husein/mesolitica/t5-sdpa-multipack/packing \
--output_dir nanot5-small-malaysian-cased-translation-v4-packing \
--dataloader_num_workers=5 \
--per_device_train_batch_size=1 \
--per_device_eval_batch_size=3 \
--gradient_accumulation_steps=16 \
--max_source_length 2048 \
--max_target_length 2048 \
--learning_rate 2e-5 \
--gradient_checkpointing false \
--weight_decay 0.01 \
--bf16 \
--ddp_find_unused_parameters true
Loading

0 comments on commit 44e087e

Please sign in to comment.