Skip to content

Commit

Permalink
[docs] use device-agnostic instead of cuda (#35047)
Browse files Browse the repository at this point in the history
* fix on xpu

* [run_all]

* add the missing import for Image lib

* add more devices in comment

* bug fix

* replace cuda
  • Loading branch information
faaany authored Dec 3, 2024
1 parent 346597b commit b8cdc26
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions docs/source/en/perf_torch_compile.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ To compile any computer vision model of your choice, call `torch.compile()` on t
```diff
from transformers import AutoModelForImageClassification

model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda")
model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to(DEVICE)
+ model = torch.compile(model)
```

Expand All @@ -47,15 +47,17 @@ from PIL import Image
import requests
import numpy as np
from transformers import AutoImageProcessor, AutoModelForImageClassification
from accelerate.test_utils.testing import get_backend

device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)

processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda")
model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to(device)
model = torch.compile(model)

processed_input = processor(image, return_tensors='pt').to(device="cuda")
processed_input = processor(image, return_tensors='pt').to(device)

with torch.no_grad():
_ = model(**processed_input)
Expand All @@ -66,13 +68,15 @@ with torch.no_grad():

```python
from transformers import AutoImageProcessor, AutoModelForObjectDetection
from accelerate.test_utils.testing import get_backend

device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda")
model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(device)
model = torch.compile(model)

texts = ["a photo of a cat", "a photo of a dog"]
inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda")
inputs = processor(text=texts, images=image, return_tensors="pt").to(device)

with torch.no_grad():
_ = model(**inputs)
Expand All @@ -82,11 +86,13 @@ with torch.no_grad():

```python
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
from accelerate.test_utils.testing import get_backend

device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to(device)
model = torch.compile(model)
seg_inputs = processor(images=image, return_tensors="pt").to("cuda")
seg_inputs = processor(images=image, return_tensors="pt").to(device)

with torch.no_grad():
_ = model(**seg_inputs)
Expand Down

0 comments on commit b8cdc26

Please sign in to comment.