diff --git a/2_preference_alignment/notebooks/dpo_finetuning_example.ipynb b/2_preference_alignment/notebooks/dpo_finetuning_example.ipynb index e432879b..58333c15 100644 --- a/2_preference_alignment/notebooks/dpo_finetuning_example.ipynb +++ b/2_preference_alignment/notebooks/dpo_finetuning_example.ipynb @@ -276,6 +276,13 @@ " use_mps_device=device == \"mps\",\n", " # Model ID for HuggingFace Hub uploads\n", " hub_model_id=finetune_name,\n", + " # DPO-specific temperature parameter that controls the strength of the preference model\n", + " # Lower values (like 0.1) make the model more conservative in following preferences\n", + " beta=0.1,\n", + " # Maximum length of the input prompt in tokens\n", + " max_prompt_length=1024,\n", + " # Maximum combined length of prompt + response in tokens\n", + " max_length=1536\n", ")" ] },