[`core` / `DDP`] Fix RM trainer + DDP + quantization + propagate `gradient_checkpointing_kwargs` in SFT & DPO #1636

	name: Build PR Documentation

	on:
	pull_request:

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.run_id }}
	cancel-in-progress: true

	jobs:
	build:
	uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
	with:
	commit_sha: ${{ github.event.pull_request.head.sha }}
	pr_number: ${{ github.event.number }}
	package: trl
	version_tag_suffix: ""

Provide feedback