huggingface · lewtun · Sep 6, 2023 · Sep 1, 2023 · Sep 1, 2023 · Sep 5, 2023
diff --git a/examples/scripts/reward_trainer.py b/examples/scripts/reward_trainer.py
@@ -15,6 +15,7 @@
 from dataclasses import dataclass, field
 from typing import Optional
 
+from accelerate import Accelerator
 from datasets import load_dataset
 from peft import LoraConfig
 from tqdm import tqdm
@@ -26,11 +27,10 @@
 tqdm.pandas()
 
 
-# Define and parse arguments.
 @dataclass
 class ScriptArguments:
     """
-    The name of the Casual LM model we wish to fine with RewardTrainer
+    Hyperparameters to fine-tune a reward model on a given dataset with the `RewardTrainer`.
     """
 
     model_name: Optional[str] = field(default="facebook/opt-350m", metadata={"help": "the model name"})
@@ -48,6 +48,7 @@ class ScriptArguments:
     gradient_accumulation_steps: Optional[int] = field(
         default=16, metadata={"help": "the number of gradient accumulation steps"}
     )
+    gradient_checkpointing: Optional[bool] = field(default=True, metadata={"help": "Enable gradient checkpointing"})
     load_in_8bit: Optional[bool] = field(default=False, metadata={"help": "load the model in 8 bits precision"})
     load_in_4bit: Optional[bool] = field(default=False, metadata={"help": "load the model in 4 bits precision"})
     use_peft: Optional[bool] = field(default=False, metadata={"help": "Wether to use PEFT or not to train adapters"})
@@ -57,6 +58,7 @@ class ScriptArguments:
 
 parser = HfArgumentParser(ScriptArguments)
 script_args = parser.parse_args_into_dataclasses()[0]
+print(script_args)
 
 # Step 1: Load the model
 if script_args.load_in_8bit and script_args.load_in_4bit:
@@ -65,8 +67,8 @@ class ScriptArguments:
     quantization_config = BitsAndBytesConfig(
         load_in_8bit=script_args.load_in_8bit, load_in_4bit=script_args.load_in_4bit
     )
-    # This means: fit the entire model on the GPU:0
-    device_map = {"": 0}
+    # Copy the model to each device
+    device_map = {"": Accelerator().local_process_index}
 else:
     device_map = None
     quantization_config = None
@@ -84,11 +86,8 @@ class ScriptArguments:
 train_dataset = load_dataset(script_args.dataset_name, split="train")
 
 
-# Turn the dataset into pairs of post + summaries, where text_j is the preferred question + answer and text_k is the other.
-# Then tokenize the dataset.
+# Tokenize chosen/rejected pairs of inputs
 # Adapt this section to your needs for custom datasets
-
-
 def preprocess_function(examples):
     new_examples = {
         "input_ids_chosen": [],
@@ -97,26 +96,27 @@ def preprocess_function(examples):
         "attention_mask_rejected": [],
     }
     for chosen, rejected in zip(examples["chosen"], examples["rejected"]):
-        tokenized_j = tokenizer(chosen, truncation=True)
-        tokenized_k = tokenizer(rejected, truncation=True)
+        tokenized_chosen = tokenizer(chosen, truncation=True)
+        tokenized_rejected = tokenizer(rejected, truncation=True)
 
-        new_examples["input_ids_chosen"].append(tokenized_j["input_ids"])
-        new_examples["attention_mask_chosen"].append(tokenized_j["attention_mask"])
-        new_examples["input_ids_rejected"].append(tokenized_k["input_ids"])
-        new_examples["attention_mask_rejected"].append(tokenized_k["attention_mask"])
+        new_examples["input_ids_chosen"].append(tokenized_chosen["input_ids"])
+        new_examples["attention_mask_chosen"].append(tokenized_chosen["attention_mask"])
+        new_examples["input_ids_rejected"].append(tokenized_rejected["input_ids"])
+        new_examples["attention_mask_rejected"].append(tokenized_rejected["attention_mask"])
 
     return new_examples
 
 
-# preprocess the dataset and filter out QAs that are longer than script_args.max_length
+# Preprocess the dataset and filter out examples that are longer than script_args.max_length
 train_dataset = train_dataset.map(
     preprocess_function,
     batched=True,
     num_proc=4,
 )
 train_dataset = train_dataset.filter(
     lambda x: len(x["input_ids_chosen"]) <= script_args.seq_length
-    and len(x["input_ids_rejected"]) <= script_args.seq_length
+    and len(x["input_ids_rejected"]) <= script_args.seq_length,
+    num_proc=4,
 )
 
 if script_args.eval_split == "none":
@@ -131,7 +131,8 @@ def preprocess_function(examples):
     )
     eval_dataset = eval_dataset.filter(
         lambda x: len(x["input_ids_chosen"]) <= script_args.seq_length
-        and len(x["input_ids_rejected"]) <= script_args.seq_length
+        and len(x["input_ids_rejected"]) <= script_args.seq_length,
+        num_proc=4,
     )
 
 
@@ -141,6 +142,7 @@ def preprocess_function(examples):
     per_device_train_batch_size=script_args.batch_size,
     num_train_epochs=script_args.num_train_epochs,
     gradient_accumulation_steps=script_args.gradient_accumulation_steps,
+    gradient_checkpointing=script_args.gradient_checkpointing,
     learning_rate=script_args.learning_rate,
     report_to="wandb" if script_args.log_with == "wandb" else "tensorboard",
     remove_unused_columns=False,

diff --git a/examples/scripts/sentiment_tuning.py b/examples/scripts/sentiment_tuning.py
@@ -16,6 +16,7 @@
 from typing import Optional
 
 import torch
+from accelerate import Accelerator
 from datasets import load_dataset
 from peft import LoraConfig
 from tqdm import tqdm
@@ -149,7 +150,8 @@ def collator(data):
         task_type="CAUSAL_LM",
     )
     ref_model = None
-    device_map = {"": 0}
+    # Copy the model to each device
+    device_map = {"": Accelerator().local_process_index}
 
 model = trl_model_class.from_pretrained(
     config.model_name,

diff --git a/examples/scripts/sft_trainer.py b/examples/scripts/sft_trainer.py
@@ -16,6 +16,7 @@
 from typing import Optional
 
 import torch
+from accelerate import Accelerator
 from datasets import load_dataset
 from peft import LoraConfig
 from tqdm import tqdm
@@ -75,8 +76,8 @@ class ScriptArguments:
     quantization_config = BitsAndBytesConfig(
         load_in_8bit=script_args.load_in_8bit, load_in_4bit=script_args.load_in_4bit
     )
-    # This means: fit the entire model on the GPU:0
-    device_map = {"": 0}
+    # Copy the model to each device
+    device_map = {"": Accelerator().local_process_index}
     torch_dtype = torch.bfloat16
 else:
     device_map = None

diff --git a/trl/trainer/reward_trainer.py b/trl/trainer/reward_trainer.py
@@ -113,7 +113,7 @@ def __init__(
             )
         elif is_peft_available() and peft_config is not None:
             if getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_quantized", False):
-                model = prepare_model_for_int8_training(model)
+                model = prepare_model_for_int8_training(model, use_gradient_checkpointing=args.gradient_checkpointing)
 
             model = get_peft_model(model, peft_config)