From f444aa9fbce36a6189ce636858fc016adf7c772e Mon Sep 17 00:00:00 2001 From: Erik Bernhardsson Date: Thu, 21 Dec 2023 09:44:08 -0500 Subject: [PATCH] Rewrite alpaca-lora --- 06_gpu_and_ml/alpaca/alpaca_lora.py | 41 +++++++++++++---------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/06_gpu_and_ml/alpaca/alpaca_lora.py b/06_gpu_and_ml/alpaca/alpaca_lora.py index 81c75bb2b..fb1c1e0d4 100644 --- a/06_gpu_and_ml/alpaca/alpaca_lora.py +++ b/06_gpu_and_ml/alpaca/alpaca_lora.py @@ -1,6 +1,6 @@ import sys -from modal import Image, Stub, method +from modal import Image, Stub, build, enter, method # Define a function for downloading the models, that will run once on image build. # This allows the weights to be present inside the image for faster startup. @@ -8,18 +8,6 @@ base_model = "luodian/llama-7b-hf" lora_weights = "tloen/alpaca-lora-7b" - -def download_models(): - from peft import PeftModel - from transformers import LlamaForCausalLM, LlamaTokenizer - - model = LlamaForCausalLM.from_pretrained( - base_model, - ) - PeftModel.from_pretrained(model, lora_weights) - LlamaTokenizer.from_pretrained(base_model) - - # Alpaca-LoRA is distributed as a public Github repository and the repository is not # installable by `pip`, so instead we install the repository by cloning it into our Modal # image. @@ -58,8 +46,14 @@ def download_models(): "torchvision~=0.16", "sentencepiece==0.1.99", ) - .run_function(download_models) ) + +with image.imports(): + import torch + from generate import generate_prompt + from peft import PeftModel + from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer + stub = Stub(name="example-alpaca-lora", image=image) # The Alpaca-LoRA model is integrated into model as a Python class with an __enter__ @@ -73,16 +67,21 @@ def download_models(): @stub.cls(gpu="A10G") class AlpacaLoRAModel: - def __enter__(self): + @build() + def download_models(self): + model = LlamaForCausalLM.from_pretrained( + base_model, + ) + PeftModel.from_pretrained(model, lora_weights) + LlamaTokenizer.from_pretrained(base_model) + + @enter() + def enter(self): """ Container-lifeycle method for model setup. Code is taken from https://github.com/tloen/alpaca-lora/blob/main/generate.py and minor modifications are made to support usage in a Python class. """ - import torch - from peft import PeftModel - from transformers import LlamaForCausalLM, LlamaTokenizer - load_8bit = False device = "cuda" if torch.cuda.is_available() else "cpu" @@ -146,10 +145,6 @@ def evaluate( max_new_tokens=128, **kwargs, ): - import torch - from generate import generate_prompt - from transformers import GenerationConfig - prompt = generate_prompt(instruction, input) inputs = self.tokenizer(prompt, return_tensors="pt") input_ids = inputs["input_ids"].to(self.device)