From be00c2f1d783904508cfd4e21803cb43c6fbdce8 Mon Sep 17 00:00:00 2001 From: Milan Kordic Date: Mon, 9 Dec 2024 17:21:03 -0500 Subject: [PATCH] add generic setup_llama.sh script --- README.md | 4 +- models/demos/t3000/llama3_70b/README.md | 4 +- .../{setup_llama3.sh => setup_llama.sh} | 140 ++++++++++++++++-- 3 files changed, 129 insertions(+), 19 deletions(-) rename models/demos/t3000/llama3_70b/{setup_llama3.sh => setup_llama.sh} (51%) diff --git a/README.md b/README.md index f47aaeb927d..d06621a08fa 100644 --- a/README.md +++ b/README.md @@ -22,9 +22,9 @@ ## LLMs -| Model | Batch | Hardware | ttft (ms) | t/s/u | Target
t/s/u | t/s | TT-Metalium Release | vLLM Release | +| Model | Batch | Hardware | ttft (ms) | t/s/u | Target
t/s/u | t/s | TT-Metalium Release | vLLM Tenstorrent Repo Release | |---------------------------------------------------------------|-------|----------------------------------------------------------|-----------|-------|-----------------|--------|---------------------------------------------------|---------------------------------------------------------------------------------------------------| -| [Falcon 7B (decode only)](./models/demos/ttnn_falcon7b) | 32 | [e150](https://tenstorrent.com/hardware/grayskull) | | 4.2 | 4.4 | 134.4 | | | +| [Falcon 7B (decode only)](./models/demos/ttnn_falcon7b) | 32 | [e150](https://tenstorrent.com/hardware/grayskull) | | 4.2 | 4.4 | 134.4 | | | | [Falcon 7B](./models/demos/wormhole/falcon7b) | 32 | [n150](https://tenstorrent.com/hardware/wormhole) | 71 | 17.6 | 26 | 563.2 | [v0.53.0-rc44](https://github.com/tenstorrent/tt-metal/tree/v0.53.0-rc44) | | | [Mistral 7B](./models/demos/wormhole/mistral7b) | 32 | [n150](https://tenstorrent.com/hardware/wormhole) | | 9.9 | 25 | 316.8 | [v0.51.0-rc28](https://github.com/tenstorrent/tt-metal/tree/v0.51.0-rc28) | | | [Mamba 2.8B](./models/demos/wormhole/mamba) | 32 | [n150](https://tenstorrent.com/hardware/wormhole) | 48 | 12.3 | 41 | 393.6 | [v0.51.0-rc26](https://github.com/tenstorrent/tt-metal/tree/v0.51.0-rc26) | | diff --git a/models/demos/t3000/llama3_70b/README.md b/models/demos/t3000/llama3_70b/README.md index 66c44157e55..80f344040d4 100644 --- a/models/demos/t3000/llama3_70b/README.md +++ b/models/demos/t3000/llama3_70b/README.md @@ -10,7 +10,7 @@ ## One command run ```bash -chmod +x ./models/demos/t3000/llama3_70b/setup_llama3.sh && ./models/demos/t3000/llama3_70b/setup_llama3.sh +chmod +x ./models/demos/t3000/llama3_70b/setup_llama.sh && ./models/demos/t3000/llama3_70b/setup_llama.sh ``` Where, `TT_METAL_COMMIT_SHA_OR_TAG` and `TT_VLLM_COMMIT_SHA_OR_TAG` are found in the root [README](/README.md#llms) under "Release" version, respectively. @@ -18,7 +18,7 @@ Where, `TT_METAL_COMMIT_SHA_OR_TAG` and `TT_VLLM_COMMIT_SHA_OR_TAG` are found in Example: ```bash -./models/demos/t3000/llama3_70b/setup_llama3.sh v0.53.0-rc36 384f1790c3be16e1d1b10de07252be2e66d00935 +./models/demos/t3000/llama3_70b/setup_llama.sh llama-3.1-70b-instruct v0.53.0-rc36 384f1790c3be16e1d1b10de07252be2e66d00935 ``` Follow prompts as they come up in CLI to select appropriate weights for Llama 3.1 70B Instruct. diff --git a/models/demos/t3000/llama3_70b/setup_llama3.sh b/models/demos/t3000/llama3_70b/setup_llama.sh similarity index 51% rename from models/demos/t3000/llama3_70b/setup_llama3.sh rename to models/demos/t3000/llama3_70b/setup_llama.sh index e3e042849a6..636ce070b2b 100644 --- a/models/demos/t3000/llama3_70b/setup_llama3.sh +++ b/models/demos/t3000/llama3_70b/setup_llama.sh @@ -7,20 +7,54 @@ set -euo pipefail -# Model information -MODEL=llama-3.1-70b-instruct -META_MODEL_NAME="Meta-Llama-3.1-70B-Instruct" -META_DIR_FILTER="llama3_1" +# Function to display usage information +usage() { + cat < + +Description: + This script sets up and deploys the Llama model along with its dependencies. + +Arguments: + The type of model to deploy. Supported options: + - llama-3.1-70b-instruct + - llama-3.1-70b + - llama-3.1-8b-instruct + - llama-3.1-8b + - llama-3-70b-instruct + - llama-3-70b + - llama-3-8b-instruct + - llama-3-8b + The commit SHA or tag to use for TT_METAL. + The commit SHA or tag to use for vLLM. + +Options: + -h, --help Display this help message. + +Examples: + # Deploy the llama-3.1-70b-instruct model + $0 llama-3.1-70b-instruct main dev + + # Deploy with specific commit SHAs + $0 llama-3.1-70b-instruct v0.53.0-rc36 384f1790c3be16e1d1b10de07252be2e66d00935 + +EOF + exit 0 +} + +# helper +if [[ "$1" == "-h" || "$1" == "--help" ]]; then + usage +fi # Require commit SHA or tag for TT_METAL and vLLM -TT_METAL_COMMIT_SHA_OR_TAG=${1:-""} -TT_VLLM_COMMIT_SHA_OR_TAG=${2:-""} +TT_METAL_COMMIT_SHA_OR_TAG=${2:-""} +TT_VLLM_COMMIT_SHA_OR_TAG=${3:-""} # Ensure required arguments are passed if [[ -z "${TT_METAL_COMMIT_SHA_OR_TAG}" || -z "${TT_VLLM_COMMIT_SHA_OR_TAG}" ]]; then echo "āŒ Error: Both TT_METAL_COMMIT_SHA_OR_TAG and TT_VLLM_COMMIT_SHA_OR_TAG are required." - echo "Usage: $0 " - exit 1 + usage fi # Defined variables @@ -37,11 +71,78 @@ print_step() { echo -e "\nšŸ‘‰ $1...\n" } +setup_model_environment() { + print_step "Setting up model environment for $1" + case "$1" in + "llama-3.1-70b-instruct") + MODEL="llama-3.1-70b-instruct" + META_MODEL_NAME="Meta-Llama-3.1-70B-Instruct" + META_DIR_FILTER="llama3_1" + REPACKED=1 + ;; + "llama-3.1-70b") + MODEL="llama-3.1-70b" + META_MODEL_NAME="Meta-Llama-3.1-70B" + META_DIR_FILTER="llama3_1" + REPACKED=1 + ;; + "llama-3.1-8b-instruct") + MODEL="llama-3.1-8b-instruct" + META_MODEL_NAME="Meta-Llama-3.1-8B-Instruct" + META_DIR_FILTER="llama3_1" + REPACKED=0 + ;; + "llama-3.1-8b") + MODEL_NAME="llama-3.1-8b" + META_MODEL_NAME="Meta-Llama-3.1-8B" + META_DIR_FILTER="llama3_1" + REPACKED=0 + ;; + "llama-3-70b-instruct") + MODEL="llama-3-70b-instruct" + META_MODEL_NAME="Meta-Llama-3-70B-Instruct" + META_DIR_FILTER="llama3" + REPACKED=1 + ;; + "llama-3-70b") + MODEL="llama-3-70b" + META_MODEL_NAME="Meta-Llama-3-70B" + META_DIR_FILTER="llama3" + REPACKED=1 + ;; + "llama-3-8b-instruct") + MODEL="llama-3-8b-instruct" + META_MODEL_NAME="Meta-Llama-3-8B-Instruct" + META_DIR_FILTER="llama3" + REPACKED=0 + ;; + "llama-3-8b") + MODEL="llama-3-8b" + META_MODEL_NAME="Meta-Llama-3-8B" + META_DIR_FILTER="llama3" + REPACKED=0 + ;; + *) + echo "ā›” Invalid model choice." + usage + exit 1 + ;; + esac + + if [ "${REPACKED}" -eq 1 ]; then + echo "REPACKED is enabled." + REPACKED_STR="repacked-" + else + echo "REPACKED is disabled." + REPACKED_STR="" + fi +} + setup_environment() { print_step "Setting up environment" - export LLAMA3_CKPT_DIR="${DEFAULT_PERSISTENT_VOLUME_ROOT}/model_weights/repacked-${MODEL}" + export LLAMA3_CKPT_DIR="${DEFAULT_PERSISTENT_VOLUME_ROOT}/model_weights/${REPACKED_STR}${MODEL}" export LLAMA3_TOKENIZER_PATH="${LLAMA3_CKPT_DIR}/tokenizer.model" - export LLAMA3_CACHE_PATH="${DEFAULT_PERSISTENT_VOLUME_ROOT}/tt_metal_cache/cache_repacked-${MODEL}" + export LLAMA3_CACHE_PATH="${DEFAULT_PERSISTENT_VOLUME_ROOT}/tt_metal_cache/cache_${REPACKED_STR}${MODEL}" export ARCH_NAME=wormhole_b0 export TT_METAL_HOME=$(pwd) export PYTHONPATH=$(pwd) @@ -100,12 +201,19 @@ setup_weights() { popd >/dev/null fi - print_step "Repacking weights" - source python_env/bin/activate huggingface-cli login - cp "${LLAMA_WEIGHTS_DIR}/tokenizer.model" "${WEIGHTS_DIR}/tokenizer.model" - cp "${LLAMA_WEIGHTS_DIR}/params.json" "${WEIGHTS_DIR}/params.json" - python models/demos/t3000/llama2_70b/scripts/repack_weights.py "${LLAMA_WEIGHTS_DIR}" "${WEIGHTS_DIR}" 5 + + if [ "${REPACKED}" -eq 1 ]; then + print_step "Repacking weights" + source python_env/bin/activate + cp "${LLAMA_WEIGHTS_DIR}/tokenizer.model" "${WEIGHTS_DIR}/tokenizer.model" + cp "${LLAMA_WEIGHTS_DIR}/params.json" "${WEIGHTS_DIR}/params.json" + python models/demos/t3000/llama2_70b/scripts/repack_weights.py "${LLAMA_WEIGHTS_DIR}" "${WEIGHTS_DIR}" 5 + else + cp -rf "${LLAMA_WEIGHTS_DIR}" "${WEIGHTS_DIR}" + fi + + echo "šŸ”” Using weights directory ${WEIGHTS_DIR}" } install_vllm() { @@ -132,6 +240,8 @@ deploy_server() { } # ---- MAIN ---- +MODEL_TYPE=$1 +setup_model_environment "$MODEL_TYPE" setup_environment check_and_build_tt_metal clone_repo "${DEFAULT_LLAMA_REPO}" "https://github.com/meta-llama/llama-models.git" "685ac4c107c75ce8c291248710bf990a876e1623"