Skip to content

Commit

Permalink
bugfix: CUDA acceleration not working
Browse files Browse the repository at this point in the history
CUDA not working after #2286.
Refactored the code to be more polish
  • Loading branch information
fakezeta committed Jun 3, 2024
1 parent 3452773 commit 0d08ee6
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 8 deletions.
13 changes: 6 additions & 7 deletions backend/python/transformers/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@


XPU=os.environ.get("XPU", "0") == "1"
if XPU:
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
else:
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria


_ONE_DAY_IN_SECONDS = 60 * 60 * 24
Expand Down Expand Up @@ -77,18 +74,19 @@ def LoadModel(self, request, context):
"""
model_name = request.Model

compute = "auto"
compute = torch.float16
if request.F16Memory == True:
compute=torch.bfloat16

self.CUDA = request.CUDA
self.CUDA = torch.cuda.is_available()
self.OV=False

device_map="cpu"

quantization = None

if self.CUDA:
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
if request.MainGPU:
device_map=request.MainGPU
else:
Expand All @@ -107,7 +105,7 @@ def LoadModel(self, request, context):
bnb_4bit_compute_dtype = None,
load_in_8bit=True,
)

try:
if request.Type == "AutoModelForCausalLM":
if XPU:
Expand Down Expand Up @@ -189,6 +187,7 @@ def LoadModel(self, request, context):
device=device_map)
self.OV = True
else:
print("Automodel", file=sys.stderr)
self.model = AutoModel.from_pretrained(model_name,
trust_remote_code=request.TrustRemoteCode,
use_safetensors=True,
Expand Down
3 changes: 2 additions & 1 deletion backend/python/transformers/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ transformers
grpcio==1.64.0
protobuf
torch
certifi
certifi
intel-extension-for-transformers
6 changes: 6 additions & 0 deletions backend/python/transformers/run.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
#!/bin/bash
source $(dirname $0)/../common/libbackend.sh

if [ -d "/opt/intel" ]; then
# Assumes we are using the Intel oneAPI container image
# https://github.com/intel/intel-extension-for-pytorch/issues/538
export XPU=1
fi

startBackend $@

0 comments on commit 0d08ee6

Please sign in to comment.