Skip to content

Commit

Permalink
Feature backend Model as subtitle
Browse files Browse the repository at this point in the history
  • Loading branch information
gongy committed Nov 13, 2023
1 parent 8f0afc0 commit fe9c43f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
4 changes: 3 additions & 1 deletion 06_gpu_and_ml/llm-frontend/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
</a>
</div>
<div class="text-4xl mt-4 mb-4 font-semibold tracking-tighter text-center">
LLaMA 2 70B
Modal LLM Engine
</div>
<div x-show="info.loaded && info.model" x-text="info.model" class="text-2xl mt-4 mb-4 font-medium tracking-tighter text-center">
</div>

<div class="flex flex-wrap justify-center items-center mt-8 mb-6">
Expand Down
9 changes: 5 additions & 4 deletions 06_gpu_and_ml/text_generation_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
#
# Any model supported by TGI can be chosen here.

GPU_CONFIG = gpu.A100(memory=80, count=2)
MODEL_ID = "meta-llama/Llama-2-70b-chat-hf"
REVISION = "36d9a7388cc80e5f4b3e9701ca2f250d21a96c30"
GPU_CONFIG = gpu.A100(memory=80, count=1)
MODEL_ID = "Phind/Phind-CodeLlama-34B-v2"
REVISION = "949f61e203f91b412efe8f679c798f09f0ff4b0c"
# Add `["--quantize", "gptq"]` for TheBloke GPTQ models.
LAUNCH_FLAGS = [
"--model-id",
Expand Down Expand Up @@ -201,7 +201,7 @@ def main():
allow_concurrent_inputs=10,
timeout=60 * 10,
)
@asgi_app(label="tgi-app")
@asgi_app(label="codellama")
def app():
import json

Expand All @@ -217,6 +217,7 @@ async def stats():
return {
"backlog": stats.backlog,
"num_total_runners": stats.num_total_runners,
"model": MODEL_ID,
}

@web_app.get("/completion/{question}")
Expand Down

0 comments on commit fe9c43f

Please sign in to comment.