Skip to content

Commit

Permalink
Merge pull request #6 from for-ai/feat/rewardbench
Browse files Browse the repository at this point in the history
Add RewardBench script
  • Loading branch information
sanggusti authored Jul 8, 2024
2 parents 6863757 + 089518a commit 219fad6
Show file tree
Hide file tree
Showing 5 changed files with 225 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,8 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

data/*
output/*
configs/*.yml
.DS_Store
14 changes: 14 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime

ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8

WORKDIR /stage

# Install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends git
COPY requirements.txt /stage
RUN pip install -r requirements.txt

# Copy all files
COPY . /stage
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
rewardbench
datasets
protobuf
77 changes: 77 additions & 0 deletions scripts/convert_multilingual_uf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""Convert multilingual ultrafeedback into a format acceptable for RewardBench
We need to follow the load_preference_dataset setup in RewardBench as
shown here: https://github.com/allenai/reward-bench/blob/main/rewardbench/utils.py#L136
So we need three columns:
- prompt (str)
- chosen (list[dict[str, str]]), and
- rejected (list[dict[str, str]])
"""

import argparse
import logging
from pathlib import Path

from datasets import load_dataset

logging.basicConfig(level=logging.INFO)


def get_args():
parser = argparse.ArgumentParser(
description="Convert a HuggingFace dataset into the RewardBench format."
)

# fmt: off
parser.add_argument("--dataset", type=str, default="nthakur/multilingual-ultrafeedback-dpo-v0.1", help="Dataset to convert.")
parser.add_argument("--output_path", type=Path, default="data/multilingual-ultrafeedback-dpo-v0.1.json", help="Path to save converted dataset as JSON file.")
parser.add_argument("--en", action="store_true", help="Use the english columns.")
# fmt: on

return parser.parse_args()


def main():
args = get_args()
if args.output_path:
args.output_path.parents[0].mkdir(parents=True, exist_ok=True)

dataset = load_dataset(args.dataset, split="test")

def _convert_to_turn_based(example):
example["chosen"] = [
{"content": example["prompt"], "role": "user"},
{"content": example["chosen_raw"], "role": "assistant"},
]
example["rejected"] = [
{"content": example["prompt"], "role": "user"},
{"content": example["rejected_raw"], "role": "assistant"},
]
return example

prefix = "en_" if args.en else ""
cols = [
"id",
"source",
"language",
f"{prefix}input",
f"{prefix}chosen",
f"{prefix}rejected",
]
rename_map = {
f"{prefix}input": "prompt",
f"{prefix}chosen": "chosen_raw",
f"{prefix}rejected": "rejected_raw",
}
dataset = (
dataset.select_columns(cols)
.rename_columns(rename_map)
.map(_convert_to_turn_based)
.remove_columns(["chosen_raw", "rejected_raw"])
)
dataset.to_json(args.output_path)
logging.info(f"Saved file to {args.output_path}.")


if __name__ == "__main__":
main()
126 changes: 126 additions & 0 deletions scripts/run_rewardbench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/bin/bash

export TRANSFORMERS_CACHE="./cache/"
export CUDA_DEVICE_ORDER="PCI_BUS_ID"
export NCCL_P2P_DISABLE=1

# Function to display usage information
usage() {
echo "Usage: $0 [DATASET] [SPLIT] [OUTDIR]"
echo " DATASET - The dataset to use (optional, default is 'ljvmiranda921/multilingual-ultrafeedback-dpi-v0.1-test')"
echo " SPLIT - The data split to use (optional, default is 'test')"
echo " OUTDIR - The output directory (optional, default is 'output/')"
exit 1
}

# Default values for arguments
DATASET="ljvmiranda921/ultrafeedback-multilingual-dpo-test"
SPLIT="test"
OUTDIR="output/"

# Check and assign arguments if provided
if [ $# -gt 3 ]; then
echo "Error: Too many arguments."
usage
elif [ $# -ge 1 ]; then
DATASET=$1
fi

if [ $# -ge 2 ]; then
SPLIT=$2
fi

if [ $# -ge 3 ]; then
OUTDIR=$3
fi

rewardbench \
--model openbmb/UltraRM-13b \
--chat_template openbmb \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 8 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5 \
--chat_template oasst_pythia \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 8 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1 \
--chat_template oasst_pythia \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 16 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model OpenAssistant/reward-model-deberta-v3-large-v2 \
--chat_template raw \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 64 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model berkeley-nest/Starling-RM-7B-alpha \
--tokenizer meta-llama/Llama-2-7b-chat-hf \
--chat_template llama-2 \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 16 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model sfairXC/FsfairX-LLaMA3-RM-v0.1 \
--tokenizer sfairXC/FsfairX-LLaMA3-RM-v0.1 \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 4 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model openbmb/Eurus-RM-7b \
--tokenizer openbmb/Eurus-RM-7b \
--chat_template mistral \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 16 \
--trust_remote_code \
--force_truncation \
--save_all

rewardbench \
--model allenai/tulu-v2.5-13b-preference-mix-rm \
--tokenizer allenai/tulu-v2.5-13b-preference-mix-rm \
--chat_template mistral \
--dataset $DATASET \
--split $SPLIT \
--output_dir $OUTDIR \
--batch_size 4 \
--trust_remote_code \
--force_truncation \
--save_all

0 comments on commit 219fad6

Please sign in to comment.