Skip to content

Commit

Permalink
Migrate to peft from opendelta for parameter efficient tuning methods (
Browse files Browse the repository at this point in the history
…CarperAI#434) +  Collapse reference+learner hydra heads when using LoRa (CarperAI#320)
  • Loading branch information
glerzing committed May 24, 2023
1 parent 355c974 commit 5abd209
Show file tree
Hide file tree
Showing 15 changed files with 1,013 additions and 427 deletions.
67 changes: 67 additions & 0 deletions examples/ppo_sentiments_peft.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Generates positive movie reviews by tuning a pretrained model on IMDB dataset
# with a sentiment reward function
import json
import os
import sys
from typing import List

import torch
from datasets import load_dataset
from peft import LoraConfig
from peft.utils.config import TaskType
from transformers import pipeline

import trlx
from trlx.data.default_configs import TRLConfig, default_ppo_config


def get_positive_score(scores):
"Extract value associated with a positive sentiment from pipeline's output"
return dict(map(lambda x: tuple(x.values()), scores))["POSITIVE"]


def main(hparams={}):
# Merge sweep config with default config if given
config = TRLConfig.update(default_ppo_config().to_dict(), hparams)

if torch.cuda.is_available():
device = int(os.environ.get("LOCAL_RANK", 0))
else:
device = -1

sentiment_fn = pipeline(
"sentiment-analysis",
"lvwerra/distilbert-imdb",
top_k=2,
truncation=True,
batch_size=256,
device=device,
)

# Just insert your peft config here (the type must be an instance of peft.PeftConfig or a dict).
config.model.peft_config = LoraConfig(
r=8,
task_type=TaskType.CAUSAL_LM,
lora_alpha=32,
lora_dropout=0.1,
)

def reward_fn(samples: List[str], **kwargs) -> List[float]:
sentiments = list(map(get_positive_score, sentiment_fn(samples)))
return sentiments

# Take few words off of movies reviews as prompts
imdb = load_dataset("imdb", split="train+test")
prompts = [" ".join(review.split()[:4]) for review in imdb["text"]]

trlx.train(
reward_fn=reward_fn,
prompts=prompts,
eval_prompts=["I don't know much about Hungarian underground"] * 256,
config=config,
)


if __name__ == "__main__":
hparams = {} if len(sys.argv) == 1 else json.loads(sys.argv[1])
main(hparams)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ numpy==1.24.3
packaging==23.1
pandas==2.0.1
pathtools==0.1.2
peft==0.3.0
pkgutil_resolve_name==1.3.10
platformdirs==3.5.0
protobuf==4.22.3
Expand Down
Loading

0 comments on commit 5abd209

Please sign in to comment.