Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lit gpt merge #4

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .github/azure-gpu-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
trigger:
branches:
include:
- "main"
- "wip"

pr:
branches:
include:
- "main"
- "wip"
- "carmocca/*"

jobs:
- job: testing
timeoutInMinutes: "20"
cancelTimeoutInMinutes: "2"
pool: "lit-rtx-3090"
variables:
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
container:
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
options: "--gpus=all --shm-size=8gb"
workspace:
clean: all
steps:

- bash: |
echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
displayName: 'set env. vars'

- bash: |
echo $(DEVICES)
echo $CUDA_VISIBLE_DEVICES
whereis nvidia
nvidia-smi
which python && which pip
python --version
pip --version
pip list
displayName: "Image info & NVIDIA"

- script: |
pip install -r requirements-all.txt pytest pytest-rerunfailures transformers>=4.38.0 einops protobuf
displayName: 'Install dependencies'

- bash: |
set -e
pip list
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'"
displayName: "Env details"

- bash: pytest -v --disable-pytest-warnings --strict-markers --color=yes
displayName: 'Ordinary tests'
env:
PL_RUN_CUDA_TESTS: "1"
timeoutInMinutes: "5"

- bash: bash run_standalone_tests.sh
workingDirectory: tests
env:
PL_RUN_CUDA_TESTS: "1"
displayName: "Standalone tests"
timeoutInMinutes: "5"
75 changes: 75 additions & 0 deletions .github/workflows/cpu-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
name: CPU tests

on:
push:
branches: [main, wip]
pull_request:
branches: [main, "carmocca/*", wip]

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

defaults:
run:
shell: bash

env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}

jobs:
cpu-tests:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- {os: "macOS-12", python-version: "3.10"}
- {os: "ubuntu-22.04", python-version: "3.11"}
- {os: "ubuntu-22.04", python-version: "3.10"}
- {os: "ubuntu-22.04", python-version: "3.9"}
- {os: "ubuntu-20.04", python-version: "3.8"}
- {os: "windows-2022", python-version: "3.10"}
timeout-minutes: 25

steps:
- uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: |
requirements.txt
requirements-all.txt
setup.py

- name: Install minimal dependencies
run: |
pip install -r requirements.txt
pip list
# make sure all modules are importable
modules=$(
find * -type f -name "*.py" | \
grep -v tests | grep "/" | grep -v lm_eval | grep -v xla | grep -v prepare_slimpajama | grep -v prepare_starcoder | \
sed 's/\.py$//' | sed 's/\//./g' | \
sed 's/.__init__//g' | xargs -I {} echo "import {};"
)
echo "$modules"
python -c "$modules"

- name: Install all dependencies
run: |
pip install -r requirements-all.txt pytest pytest-rerunfailures pytest-timeout transformers>=4.38.0 einops protobuf
pip list

- name: Run tests without the package installed
run: |
pytest -v --disable-pytest-warnings --strict-markers --color=yes --timeout 120

- name: Run tests
run: |
pip install . --no-deps

pytest -v --disable-pytest-warnings --strict-markers --color=yes --timeout 120
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ __pycache__
build
.venv
.vscode
myenv

# data
data
Expand All @@ -15,5 +16,7 @@ checkpoints
out
wandb
events.out.tfevents*

*.out
*.csv
tests/reference_models
output/
38 changes: 3 additions & 35 deletions finetune/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,36 +31,6 @@
)
from scripts.prepare_alpaca import generate_prompt

#
eval_interval = 100
save_interval = 100
eval_iters = 100
eval_max_new_tokens = 100
log_interval = 1
devices = 1

# Hyperparameters
learning_rate = 3e-4
batch_size = 128
# To work in T4 SingleGPU
micro_batch_size = 2
gradient_accumulation_iters = batch_size // micro_batch_size
assert gradient_accumulation_iters > 0
max_iters = 50000 # train dataset size
weight_decay = 0.01
lora_r = 16
lora_alpha = 32
lora_dropout = 0.05
lora_query = True
lora_key = True
lora_value = True
lora_projection = True
lora_mlp = True
lora_head = True
warmup_steps = 100

hparams = {k: v for k, v in locals().items() if isinstance(v, (int, float, str)) and not k.startswith("_")}
#

def setup(
precision: Optional[str] = None,
Expand Down Expand Up @@ -127,7 +97,7 @@ def setup(

if not any((lora_query, lora_key, lora_value, lora_projection, lora_mlp, lora_head)):
fabric.print("Warning: all LoRA layers are disabled!")
fabric.print(hparams)

fabric.launch(
main,
devices,
Expand All @@ -150,6 +120,7 @@ def setup(
)



def main(fabric: L.Fabric, devices: int, seed: int, config: Config, io: IOArgs, train: TrainArgs, eval: EvalArgs) -> None:
validate_args(io, train, eval)

Expand Down Expand Up @@ -221,10 +192,7 @@ def fit(
tokenizer = Tokenizer(io.checkpoint_dir)
longest_seq_length, longest_seq_ix = get_longest_seq_length(train_data)

# The existing code model.max_seq_length = longest_seq_length
# sets the maximum length based on the training data, which seem to less. Hence setting it to a hardcoded number.
model.max_seq_length = 500
# model.max_seq_length = min(longest_seq_length, train.max_seq_length or float("inf"))
model.max_seq_length = min(longest_seq_length, train.max_seq_length or float("inf"))

fabric.print(
f"The longest sequence length in the train data is {longest_seq_length}, the model's maximum sequence length is"
Expand Down
2 changes: 1 addition & 1 deletion generate/lora_ui_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def main(

fabric.print(f"Loading model {str(checkpoint_path)!r} with {config.__dict__}", file=sys.stderr)
t0 = time.perf_counter()
#with fabric.init_module(empty_init=True), gptq_quantization(quantize == "gptq.int4"):

with fabric.init_module(empty_init=True):
model = GPT(config)
fabric.print(f"Time to instantiate model: {time.perf_counter() - t0:.02f} seconds.", file=sys.stderr)
Expand Down
10 changes: 0 additions & 10 deletions scripts/merge_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,6 @@
from lit_gpt.lora import GPT, Config, lora_filter, merge_lora_weights
from lit_gpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision, lazy_load

lora_r = 16
lora_alpha = 32
lora_dropout = 0.05
lora_query = True
lora_key = True
lora_value = True
lora_projection = True
lora_mlp = True
lora_head = True

def merge_lora(
lora_path: Path = Path("out/lora/alpaca/lit_model_lora_finetuned.pth"),
checkpoint_dir: Path = Path("checkpoints/stabilityai/stablelm-base-alpha-3b"),
Expand Down
1 change: 1 addition & 0 deletions scripts/prepare_ui_gen_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def prepare_sample(example: dict, tokenizer: Tokenizer, max_length: int, mask_in
in the label that correspond to the original input prompt get masked out (default).
"""
full_prompt = generate_prompt(example)
print(full_prompt)
full_prompt_and_response = full_prompt + example["output"]
encoded_full_prompt = tokenizer.encode(full_prompt, max_length=max_length)
encoded_full_prompt_and_response = tokenizer.encode(full_prompt_and_response, eos=True, max_length=max_length)
Expand Down
Loading