[docs] use device-agnostic instead of cuda (#35047)

* fix on xpu * [run_all] * add the missing import for Image lib * add more devices in comment * bug fix * replace cuda
huggingface · Dec 3, 2024 · b8cdc26 · b8cdc26
1 parent 346597b
commit b8cdc26
Showing 1 changed file with 13 additions and 7 deletions.
diff --git a/docs/source/en/perf_torch_compile.md b/docs/source/en/perf_torch_compile.md
@@ -27,7 +27,7 @@ To compile any computer vision model of your choice, call `torch.compile()` on t
 ```diff
 from transformers import AutoModelForImageClassification
 
-model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to("cuda")
+model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to(DEVICE)
 + model = torch.compile(model)
 ```
 
@@ -47,15 +47,17 @@ from PIL import Image
 import requests
 import numpy as np
 from transformers import AutoImageProcessor, AutoModelForImageClassification
+from accelerate.test_utils.testing import get_backend
 
+device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
 url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
 image = Image.open(requests.get(url, stream=True).raw)
 
 processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
-model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to("cuda")
+model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224").to(device)
 model = torch.compile(model)
 
-processed_input = processor(image, return_tensors='pt').to(device="cuda")
+processed_input = processor(image, return_tensors='pt').to(device)
 
 with torch.no_grad():
     _ = model(**processed_input)
@@ -66,13 +68,15 @@ with torch.no_grad():
 
 ```python 
 from transformers import AutoImageProcessor, AutoModelForObjectDetection
+from accelerate.test_utils.testing import get_backend
 
+device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
 processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
-model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to("cuda")
+model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(device)
 model = torch.compile(model)
 
 texts = ["a photo of a cat", "a photo of a dog"]
-inputs = processor(text=texts, images=image, return_tensors="pt").to("cuda")
+inputs = processor(text=texts, images=image, return_tensors="pt").to(device)
 
 with torch.no_grad():
     _ = model(**inputs)
@@ -82,11 +86,13 @@ with torch.no_grad():
 
 ```python 
 from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+from accelerate.test_utils.testing import get_backend
 
+device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.)
 processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
-model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to("cuda")
+model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512").to(device)
 model = torch.compile(model)
-seg_inputs = processor(images=image, return_tensors="pt").to("cuda")
+seg_inputs = processor(images=image, return_tensors="pt").to(device)
 
 with torch.no_grad():
     _ = model(**seg_inputs)