From df2dc6832ba994da2832a7ea7d897eadd0f5912c Mon Sep 17 00:00:00 2001 From: Knight7561 Date: Wed, 18 Dec 2024 16:34:43 -0500 Subject: [PATCH] Fixing wrong args passed to DPOTrainer --- .../notebooks/dpo_finetuning_example.ipynb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/2_preference_alignment/notebooks/dpo_finetuning_example.ipynb b/2_preference_alignment/notebooks/dpo_finetuning_example.ipynb index be6116ad..a18d39e1 100644 --- a/2_preference_alignment/notebooks/dpo_finetuning_example.ipynb +++ b/2_preference_alignment/notebooks/dpo_finetuning_example.ipynb @@ -276,6 +276,13 @@ " use_mps_device=device == \"mps\",\n", " # Model ID for HuggingFace Hub uploads\n", " hub_model_id=finetune_name,\n", + " # DPO-specific temperature parameter that controls the strength of the preference model\n", + " # Lower values (like 0.1) make the model more conservative in following preferences\n", + " beta=0.1,\n", + " # Maximum length of the input prompt in tokens\n", + " max_prompt_length=1024,\n", + " # Maximum combined length of prompt + response in tokens\n", + " max_length=1536\n", ")" ] }, @@ -294,13 +301,6 @@ " train_dataset=dataset,\n", " # Tokenizer for processing inputs\n", " processing_class=tokenizer,\n", - " # DPO-specific temperature parameter that controls the strength of the preference model\n", - " # Lower values (like 0.1) make the model more conservative in following preferences\n", - " beta=0.1,\n", - " # Maximum length of the input prompt in tokens\n", - " max_prompt_length=1024,\n", - " # Maximum combined length of prompt + response in tokens\n", - " max_length=1536,\n", ")" ] },