diff --git a/docs/config.qmd b/docs/config.qmd index caa9b7649f..dadc5c487c 100644 --- a/docs/config.qmd +++ b/docs/config.qmd @@ -268,6 +268,7 @@ torch_compile_backend: # Optional[str] # If greater than 1, backpropagation will be skipped and the gradients will be accumulated for the given number of steps. gradient_accumulation_steps: 1 # The number of samples to include in each batch. This is the number of samples sent to each GPU. +# Batch size per gpu = micro_batch_size * gradient_accumulation_steps micro_batch_size: 2 eval_batch_size: num_epochs: 4