From be00c2f1d783904508cfd4e21803cb43c6fbdce8 Mon Sep 17 00:00:00 2001
From: Milan Kordic <mkordic@tenstorrent.com>
Date: Mon, 9 Dec 2024 17:21:03 -0500
Subject: [PATCH] add generic setup_llama.sh script

---
 README.md                                     |   4 +-
 models/demos/t3000/llama3_70b/README.md       |   4 +-
 .../{setup_llama3.sh => setup_llama.sh}       | 140 ++++++++++++++++--
 3 files changed, 129 insertions(+), 19 deletions(-)
 rename models/demos/t3000/llama3_70b/{setup_llama3.sh => setup_llama.sh} (51%)
diff --git a/README.md b/README.md
index f47aaeb927d..d06621a08fa 100644
--- a/README.md
+++ b/README.md
@@ -22,9 +22,9 @@
 
 ## LLMs
 
-| Model                                                         | Batch | Hardware                                                 | ttft (ms) | t/s/u | Target<br>t/s/u | t/s    | TT-Metalium Release                                            | vLLM Release                                                                                      |
+| Model                                                         | Batch | Hardware                                                 | ttft (ms) | t/s/u | Target<br>t/s/u | t/s    | TT-Metalium Release                                            | vLLM Tenstorrent Repo Release                                                                                |
 |---------------------------------------------------------------|-------|----------------------------------------------------------|-----------|-------|-----------------|--------|---------------------------------------------------|---------------------------------------------------------------------------------------------------|
-| [Falcon 7B (decode only)](./models/demos/ttnn_falcon7b)       | 32    | [e150](https://tenstorrent.com/hardware/grayskull)       |           | 4.2   | 4.4             | 134.4  |                                               |                                                                                                   |
+| [Falcon 7B (decode only)](./models/demos/ttnn_falcon7b)       | 32    | [e150](https://tenstorrent.com/hardware/grayskull)       |           | 4.2   | 4.4             | 134.4  |                                               |                                                                            |
 | [Falcon 7B](./models/demos/wormhole/falcon7b)                 | 32    | [n150](https://tenstorrent.com/hardware/wormhole)        | 71        | 17.6  | 26              | 563.2  | [v0.53.0-rc44](https://github.com/tenstorrent/tt-metal/tree/v0.53.0-rc44) |                                                                                                   |
 | [Mistral 7B](./models/demos/wormhole/mistral7b)               | 32    | [n150](https://tenstorrent.com/hardware/wormhole)        |           | 9.9   | 25              | 316.8  | [v0.51.0-rc28](https://github.com/tenstorrent/tt-metal/tree/v0.51.0-rc28) |                                                                                                   |
 | [Mamba 2.8B](./models/demos/wormhole/mamba)                   | 32    | [n150](https://tenstorrent.com/hardware/wormhole)        | 48        | 12.3  | 41              | 393.6  | [v0.51.0-rc26](https://github.com/tenstorrent/tt-metal/tree/v0.51.0-rc26) |                                                                                                   |
diff --git a/models/demos/t3000/llama3_70b/README.md b/models/demos/t3000/llama3_70b/README.md
index 66c44157e55..80f344040d4 100644
--- a/models/demos/t3000/llama3_70b/README.md
+++ b/models/demos/t3000/llama3_70b/README.md
@@ -10,7 +10,7 @@
 ## One command run
 
 ```bash
-chmod +x ./models/demos/t3000/llama3_70b/setup_llama3.sh && ./models/demos/t3000/llama3_70b/setup_llama3.sh <TT_METAL_COMMIT_SHA_OR_TAG> <TT_VLLM_COMMIT_SHA_OR_TAG>
+chmod +x ./models/demos/t3000/llama3_70b/setup_llama.sh && ./models/demos/t3000/llama3_70b/setup_llama.sh <MODEL_TYPE> <TT_METAL_COMMIT_SHA_OR_TAG> <TT_VLLM_COMMIT_SHA_OR_TAG>
 ```
 
 Where, `TT_METAL_COMMIT_SHA_OR_TAG` and `TT_VLLM_COMMIT_SHA_OR_TAG` are found in the root [README](/README.md#llms) under "Release" version, respectively.
@@ -18,7 +18,7 @@ Where, `TT_METAL_COMMIT_SHA_OR_TAG` and `TT_VLLM_COMMIT_SHA_OR_TAG` are found in
 Example:
 
 ```bash
-./models/demos/t3000/llama3_70b/setup_llama3.sh v0.53.0-rc36 384f1790c3be16e1d1b10de07252be2e66d00935
+./models/demos/t3000/llama3_70b/setup_llama.sh llama-3.1-70b-instruct v0.53.0-rc36 384f1790c3be16e1d1b10de07252be2e66d00935
 ```
 
 Follow prompts as they come up in CLI to select appropriate weights for Llama 3.1 70B Instruct.
diff --git a/models/demos/t3000/llama3_70b/setup_llama3.sh b/models/demos/t3000/llama3_70b/setup_llama.sh
similarity index 51%
rename from models/demos/t3000/llama3_70b/setup_llama3.sh
rename to models/demos/t3000/llama3_70b/setup_llama.sh
index e3e042849a6..636ce070b2b 100644
--- a/models/demos/t3000/llama3_70b/setup_llama3.sh
+++ b/models/demos/t3000/llama3_70b/setup_llama.sh
@@ -7,20 +7,54 @@
 
 set -euo pipefail
 
-# Model information
-MODEL=llama-3.1-70b-instruct
-META_MODEL_NAME="Meta-Llama-3.1-70B-Instruct"
-META_DIR_FILTER="llama3_1"
+# Function to display usage information
+usage() {
+    cat <<EOF
+Usage: $0 <MODEL_TYPE> <TT_METAL_COMMIT_SHA_OR_TAG> <TT_VLLM_COMMIT_SHA_OR_TAG>
+
+Description:
+  This script sets up and deploys the Llama model along with its dependencies.
+
+Arguments:
+  <MODEL_TYPE>                  The type of model to deploy. Supported options:
+                                  - llama-3.1-70b-instruct
+                                  - llama-3.1-70b
+                                  - llama-3.1-8b-instruct
+                                  - llama-3.1-8b
+                                  - llama-3-70b-instruct
+                                  - llama-3-70b
+                                  - llama-3-8b-instruct
+                                  - llama-3-8b
+  <TT_METAL_COMMIT_SHA_OR_TAG>  The commit SHA or tag to use for TT_METAL.
+  <TT_VLLM_COMMIT_SHA_OR_TAG>   The commit SHA or tag to use for vLLM.
+
+Options:
+  -h, --help                    Display this help message.
+
+Examples:
+  # Deploy the llama-3.1-70b-instruct model
+  $0 llama-3.1-70b-instruct main dev
+
+  # Deploy with specific commit SHAs
+  $0 llama-3.1-70b-instruct v0.53.0-rc36 384f1790c3be16e1d1b10de07252be2e66d00935
+
+EOF
+    exit 0
+}
+
+# helper
+if [[ "$1" == "-h" || "$1" == "--help" ]]; then
+    usage
+fi
 
 # Require commit SHA or tag for TT_METAL and vLLM
-TT_METAL_COMMIT_SHA_OR_TAG=${1:-""}
-TT_VLLM_COMMIT_SHA_OR_TAG=${2:-""}
+TT_METAL_COMMIT_SHA_OR_TAG=${2:-""}
+TT_VLLM_COMMIT_SHA_OR_TAG=${3:-""}
 
 # Ensure required arguments are passed
 if [[ -z "${TT_METAL_COMMIT_SHA_OR_TAG}" || -z "${TT_VLLM_COMMIT_SHA_OR_TAG}" ]]; then
     echo "❌ Error: Both TT_METAL_COMMIT_SHA_OR_TAG and TT_VLLM_COMMIT_SHA_OR_TAG are required."
-    echo "Usage: $0 <TT_METAL_COMMIT_SHA_OR_TAG> <TT_VLLM_COMMIT_SHA_OR_TAG>"
-    exit 1
+    usage
 fi
 
 # Defined variables
@@ -37,11 +71,78 @@ print_step() {
     echo -e "\n👉 $1...\n"
 }
 
+setup_model_environment() {
+    print_step "Setting up model environment for $1"
+    case "$1" in
+      "llama-3.1-70b-instruct")
+      MODEL="llama-3.1-70b-instruct"
+      META_MODEL_NAME="Meta-Llama-3.1-70B-Instruct"
+      META_DIR_FILTER="llama3_1"
+      REPACKED=1
+      ;;
+      "llama-3.1-70b")
+      MODEL="llama-3.1-70b"
+      META_MODEL_NAME="Meta-Llama-3.1-70B"
+      META_DIR_FILTER="llama3_1"
+      REPACKED=1
+      ;;
+      "llama-3.1-8b-instruct")
+      MODEL="llama-3.1-8b-instruct"
+      META_MODEL_NAME="Meta-Llama-3.1-8B-Instruct"
+      META_DIR_FILTER="llama3_1"
+      REPACKED=0
+      ;;
+      "llama-3.1-8b")
+      MODEL_NAME="llama-3.1-8b"
+      META_MODEL_NAME="Meta-Llama-3.1-8B"
+      META_DIR_FILTER="llama3_1"
+      REPACKED=0
+      ;;
+      "llama-3-70b-instruct")
+      MODEL="llama-3-70b-instruct"
+      META_MODEL_NAME="Meta-Llama-3-70B-Instruct"
+      META_DIR_FILTER="llama3"
+      REPACKED=1
+      ;;
+      "llama-3-70b")
+      MODEL="llama-3-70b"
+      META_MODEL_NAME="Meta-Llama-3-70B"
+      META_DIR_FILTER="llama3"
+      REPACKED=1
+      ;;
+      "llama-3-8b-instruct")
+      MODEL="llama-3-8b-instruct"
+      META_MODEL_NAME="Meta-Llama-3-8B-Instruct"
+      META_DIR_FILTER="llama3"
+      REPACKED=0
+      ;;
+      "llama-3-8b")
+      MODEL="llama-3-8b"
+      META_MODEL_NAME="Meta-Llama-3-8B"
+      META_DIR_FILTER="llama3"
+      REPACKED=0
+      ;;
+      *)
+      echo "⛔ Invalid model choice."
+      usage
+      exit 1
+      ;;
+    esac
+
+    if [ "${REPACKED}" -eq 1 ]; then
+        echo "REPACKED is enabled."
+        REPACKED_STR="repacked-"
+    else
+        echo "REPACKED is disabled."
+        REPACKED_STR=""
+    fi
+}
+
 setup_environment() {
     print_step "Setting up environment"
-    export LLAMA3_CKPT_DIR="${DEFAULT_PERSISTENT_VOLUME_ROOT}/model_weights/repacked-${MODEL}"
+    export LLAMA3_CKPT_DIR="${DEFAULT_PERSISTENT_VOLUME_ROOT}/model_weights/${REPACKED_STR}${MODEL}"
     export LLAMA3_TOKENIZER_PATH="${LLAMA3_CKPT_DIR}/tokenizer.model"
-    export LLAMA3_CACHE_PATH="${DEFAULT_PERSISTENT_VOLUME_ROOT}/tt_metal_cache/cache_repacked-${MODEL}"
+    export LLAMA3_CACHE_PATH="${DEFAULT_PERSISTENT_VOLUME_ROOT}/tt_metal_cache/cache_${REPACKED_STR}${MODEL}"
     export ARCH_NAME=wormhole_b0
     export TT_METAL_HOME=$(pwd)
     export PYTHONPATH=$(pwd)
@@ -100,12 +201,19 @@ setup_weights() {
         popd >/dev/null
     fi
 
-    print_step "Repacking weights"
-    source python_env/bin/activate
     huggingface-cli login
-    cp "${LLAMA_WEIGHTS_DIR}/tokenizer.model" "${WEIGHTS_DIR}/tokenizer.model"
-    cp "${LLAMA_WEIGHTS_DIR}/params.json" "${WEIGHTS_DIR}/params.json"
-    python models/demos/t3000/llama2_70b/scripts/repack_weights.py "${LLAMA_WEIGHTS_DIR}" "${WEIGHTS_DIR}" 5
+
+    if [ "${REPACKED}" -eq 1 ]; then
+        print_step "Repacking weights"
+        source python_env/bin/activate
+        cp "${LLAMA_WEIGHTS_DIR}/tokenizer.model" "${WEIGHTS_DIR}/tokenizer.model"
+        cp "${LLAMA_WEIGHTS_DIR}/params.json" "${WEIGHTS_DIR}/params.json"
+        python models/demos/t3000/llama2_70b/scripts/repack_weights.py "${LLAMA_WEIGHTS_DIR}" "${WEIGHTS_DIR}" 5
+    else
+        cp -rf "${LLAMA_WEIGHTS_DIR}" "${WEIGHTS_DIR}"
+    fi
+
+    echo "🔔 Using weights directory ${WEIGHTS_DIR}"
 }
 
 install_vllm() {
@@ -132,6 +240,8 @@ deploy_server() {
 }
 
 # ---- MAIN ----
+MODEL_TYPE=$1
+setup_model_environment "$MODEL_TYPE"
 setup_environment
 check_and_build_tt_metal
 clone_repo "${DEFAULT_LLAMA_REPO}" "https://github.com/meta-llama/llama-models.git" "685ac4c107c75ce8c291248710bf990a876e1623"