From c233512e41bdfc530929b419bd0a95d4a6bef3f9 Mon Sep 17 00:00:00 2001
From: Kenny Ning <kennyning@gmail.com>
Date: Thu, 8 Feb 2024 09:04:21 -0800
Subject: [PATCH] update tgi model example (#575)

* update llama revision and image

* pin python

* fix black
---
 06_gpu_and_ml/text_generation_inference.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/06_gpu_and_ml/text_generation_inference.py b/06_gpu_and_ml/text_generation_inference.py
index c5ca52916..a8b4f1751 100644
--- a/06_gpu_and_ml/text_generation_inference.py
+++ b/06_gpu_and_ml/text_generation_inference.py
@@ -26,7 +26,7 @@
 
 GPU_CONFIG = gpu.A100(memory=80, count=2)
 MODEL_ID = "meta-llama/Llama-2-70b-chat-hf"
-REVISION = "36d9a7388cc80e5f4b3e9701ca2f250d21a96c30"
+REVISION = "e1ce257bd76895e0864f3b4d6c7ed3c4cdec93e2"
 # Add `["--quantize", "gptq"]` for TheBloke GPTQ models.
 LAUNCH_FLAGS = [
     "--model-id",
@@ -84,7 +84,9 @@ def download_model():
 stub = Stub("example-tgi-" + MODEL_ID.split("/")[-1])
 
 tgi_image = (
-    Image.from_registry("ghcr.io/huggingface/text-generation-inference:1.0.3")
+    Image.from_registry(
+        "ghcr.io/huggingface/text-generation-inference:1.4", add_python="3.10"
+    )
     .dockerfile_commands("ENTRYPOINT []")
     .run_function(
         download_model, secrets=[Secret.from_name("huggingface-secret")]