Skip to content

Commit

Permalink
bugfix: CUDA acceleration not working (#2475)
Browse files Browse the repository at this point in the history
* bugfix: CUDA acceleration not working

CUDA not working after #2286.
Refactored the code to be more polish

* Update requirements.txt

Missing imports

Signed-off-by: fakezeta <[email protected]>

* Update requirements.txt

Signed-off-by: fakezeta <[email protected]>

---------

Signed-off-by: fakezeta <[email protected]>
  • Loading branch information
fakezeta authored Jun 3, 2024
1 parent daa7544 commit 6ef78ef
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 8 deletions.
13 changes: 6 additions & 7 deletions backend/python/transformers/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@


XPU=os.environ.get("XPU", "0") == "1"
if XPU:
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
else:
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria


_ONE_DAY_IN_SECONDS = 60 * 60 * 24
Expand Down Expand Up @@ -77,18 +74,19 @@ def LoadModel(self, request, context):
"""
model_name = request.Model

compute = "auto"
compute = torch.float16
if request.F16Memory == True:
compute=torch.bfloat16

self.CUDA = request.CUDA
self.CUDA = torch.cuda.is_available()
self.OV=False

device_map="cpu"

quantization = None

if self.CUDA:
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
if request.MainGPU:
device_map=request.MainGPU
else:
Expand All @@ -107,7 +105,7 @@ def LoadModel(self, request, context):
bnb_4bit_compute_dtype = None,
load_in_8bit=True,
)

try:
if request.Type == "AutoModelForCausalLM":
if XPU:
Expand Down Expand Up @@ -189,6 +187,7 @@ def LoadModel(self, request, context):
device=device_map)
self.OV = True
else:
print("Automodel", file=sys.stderr)
self.model = AutoModel.from_pretrained(model_name,
trust_remote_code=request.TrustRemoteCode,
use_safetensors=True,
Expand Down
5 changes: 4 additions & 1 deletion backend/python/transformers/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@ transformers
grpcio==1.64.0
protobuf
torch
certifi
certifi
intel-extension-for-transformers
bitsandbytes
setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
6 changes: 6 additions & 0 deletions backend/python/transformers/run.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
#!/bin/bash
source $(dirname $0)/../common/libbackend.sh

if [ -d "/opt/intel" ]; then
# Assumes we are using the Intel oneAPI container image
# https://github.com/intel/intel-extension-for-pytorch/issues/538
export XPU=1
fi

startBackend $@

0 comments on commit 6ef78ef

Please sign in to comment.