Skip to content

Commit

Permalink
add generic setup_llama.sh script
Browse files Browse the repository at this point in the history
  • Loading branch information
milank94 committed Dec 9, 2024
1 parent 80b06af commit be00c2f
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 19 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@

## LLMs

| Model | Batch | Hardware | ttft (ms) | t/s/u | Target<br>t/s/u | t/s | TT-Metalium Release | vLLM Release |
| Model | Batch | Hardware | ttft (ms) | t/s/u | Target<br>t/s/u | t/s | TT-Metalium Release | vLLM Tenstorrent Repo Release |
|---------------------------------------------------------------|-------|----------------------------------------------------------|-----------|-------|-----------------|--------|---------------------------------------------------|---------------------------------------------------------------------------------------------------|
| [Falcon 7B (decode only)](./models/demos/ttnn_falcon7b) | 32 | [e150](https://tenstorrent.com/hardware/grayskull) | | 4.2 | 4.4 | 134.4 | | |
| [Falcon 7B (decode only)](./models/demos/ttnn_falcon7b) | 32 | [e150](https://tenstorrent.com/hardware/grayskull) | | 4.2 | 4.4 | 134.4 | | |
| [Falcon 7B](./models/demos/wormhole/falcon7b) | 32 | [n150](https://tenstorrent.com/hardware/wormhole) | 71 | 17.6 | 26 | 563.2 | [v0.53.0-rc44](https://github.com/tenstorrent/tt-metal/tree/v0.53.0-rc44) | |
| [Mistral 7B](./models/demos/wormhole/mistral7b) | 32 | [n150](https://tenstorrent.com/hardware/wormhole) | | 9.9 | 25 | 316.8 | [v0.51.0-rc28](https://github.com/tenstorrent/tt-metal/tree/v0.51.0-rc28) | |
| [Mamba 2.8B](./models/demos/wormhole/mamba) | 32 | [n150](https://tenstorrent.com/hardware/wormhole) | 48 | 12.3 | 41 | 393.6 | [v0.51.0-rc26](https://github.com/tenstorrent/tt-metal/tree/v0.51.0-rc26) | |
Expand Down
4 changes: 2 additions & 2 deletions models/demos/t3000/llama3_70b/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
## One command run

```bash
chmod +x ./models/demos/t3000/llama3_70b/setup_llama3.sh && ./models/demos/t3000/llama3_70b/setup_llama3.sh <TT_METAL_COMMIT_SHA_OR_TAG> <TT_VLLM_COMMIT_SHA_OR_TAG>
chmod +x ./models/demos/t3000/llama3_70b/setup_llama.sh && ./models/demos/t3000/llama3_70b/setup_llama.sh <MODEL_TYPE> <TT_METAL_COMMIT_SHA_OR_TAG> <TT_VLLM_COMMIT_SHA_OR_TAG>
```

Where, `TT_METAL_COMMIT_SHA_OR_TAG` and `TT_VLLM_COMMIT_SHA_OR_TAG` are found in the root [README](/README.md#llms) under "Release" version, respectively.

Example:

```bash
./models/demos/t3000/llama3_70b/setup_llama3.sh v0.53.0-rc36 384f1790c3be16e1d1b10de07252be2e66d00935
./models/demos/t3000/llama3_70b/setup_llama.sh llama-3.1-70b-instruct v0.53.0-rc36 384f1790c3be16e1d1b10de07252be2e66d00935
```

Follow prompts as they come up in CLI to select appropriate weights for Llama 3.1 70B Instruct.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,54 @@

set -euo pipefail

# Model information
MODEL=llama-3.1-70b-instruct
META_MODEL_NAME="Meta-Llama-3.1-70B-Instruct"
META_DIR_FILTER="llama3_1"
# Function to display usage information
usage() {
cat <<EOF
Usage: $0 <MODEL_TYPE> <TT_METAL_COMMIT_SHA_OR_TAG> <TT_VLLM_COMMIT_SHA_OR_TAG>
Description:
This script sets up and deploys the Llama model along with its dependencies.
Arguments:
<MODEL_TYPE> The type of model to deploy. Supported options:
- llama-3.1-70b-instruct
- llama-3.1-70b
- llama-3.1-8b-instruct
- llama-3.1-8b
- llama-3-70b-instruct
- llama-3-70b
- llama-3-8b-instruct
- llama-3-8b
<TT_METAL_COMMIT_SHA_OR_TAG> The commit SHA or tag to use for TT_METAL.
<TT_VLLM_COMMIT_SHA_OR_TAG> The commit SHA or tag to use for vLLM.
Options:
-h, --help Display this help message.
Examples:
# Deploy the llama-3.1-70b-instruct model
$0 llama-3.1-70b-instruct main dev
# Deploy with specific commit SHAs
$0 llama-3.1-70b-instruct v0.53.0-rc36 384f1790c3be16e1d1b10de07252be2e66d00935
EOF
exit 0
}

# helper
if [[ "$1" == "-h" || "$1" == "--help" ]]; then
usage
fi

# Require commit SHA or tag for TT_METAL and vLLM
TT_METAL_COMMIT_SHA_OR_TAG=${1:-""}
TT_VLLM_COMMIT_SHA_OR_TAG=${2:-""}
TT_METAL_COMMIT_SHA_OR_TAG=${2:-""}
TT_VLLM_COMMIT_SHA_OR_TAG=${3:-""}

# Ensure required arguments are passed
if [[ -z "${TT_METAL_COMMIT_SHA_OR_TAG}" || -z "${TT_VLLM_COMMIT_SHA_OR_TAG}" ]]; then
echo "❌ Error: Both TT_METAL_COMMIT_SHA_OR_TAG and TT_VLLM_COMMIT_SHA_OR_TAG are required."
echo "Usage: $0 <TT_METAL_COMMIT_SHA_OR_TAG> <TT_VLLM_COMMIT_SHA_OR_TAG>"
exit 1
usage
fi

# Defined variables
Expand All @@ -37,11 +71,78 @@ print_step() {
echo -e "\n👉 $1...\n"
}

setup_model_environment() {
print_step "Setting up model environment for $1"
case "$1" in
"llama-3.1-70b-instruct")
MODEL="llama-3.1-70b-instruct"
META_MODEL_NAME="Meta-Llama-3.1-70B-Instruct"
META_DIR_FILTER="llama3_1"
REPACKED=1
;;
"llama-3.1-70b")
MODEL="llama-3.1-70b"
META_MODEL_NAME="Meta-Llama-3.1-70B"
META_DIR_FILTER="llama3_1"
REPACKED=1
;;
"llama-3.1-8b-instruct")
MODEL="llama-3.1-8b-instruct"
META_MODEL_NAME="Meta-Llama-3.1-8B-Instruct"
META_DIR_FILTER="llama3_1"
REPACKED=0
;;
"llama-3.1-8b")
MODEL_NAME="llama-3.1-8b"
META_MODEL_NAME="Meta-Llama-3.1-8B"
META_DIR_FILTER="llama3_1"
REPACKED=0
;;
"llama-3-70b-instruct")
MODEL="llama-3-70b-instruct"
META_MODEL_NAME="Meta-Llama-3-70B-Instruct"
META_DIR_FILTER="llama3"
REPACKED=1
;;
"llama-3-70b")
MODEL="llama-3-70b"
META_MODEL_NAME="Meta-Llama-3-70B"
META_DIR_FILTER="llama3"
REPACKED=1
;;
"llama-3-8b-instruct")
MODEL="llama-3-8b-instruct"
META_MODEL_NAME="Meta-Llama-3-8B-Instruct"
META_DIR_FILTER="llama3"
REPACKED=0
;;
"llama-3-8b")
MODEL="llama-3-8b"
META_MODEL_NAME="Meta-Llama-3-8B"
META_DIR_FILTER="llama3"
REPACKED=0
;;
*)
echo "⛔ Invalid model choice."
usage
exit 1
;;
esac

if [ "${REPACKED}" -eq 1 ]; then
echo "REPACKED is enabled."
REPACKED_STR="repacked-"
else
echo "REPACKED is disabled."
REPACKED_STR=""
fi
}

setup_environment() {
print_step "Setting up environment"
export LLAMA3_CKPT_DIR="${DEFAULT_PERSISTENT_VOLUME_ROOT}/model_weights/repacked-${MODEL}"
export LLAMA3_CKPT_DIR="${DEFAULT_PERSISTENT_VOLUME_ROOT}/model_weights/${REPACKED_STR}${MODEL}"
export LLAMA3_TOKENIZER_PATH="${LLAMA3_CKPT_DIR}/tokenizer.model"
export LLAMA3_CACHE_PATH="${DEFAULT_PERSISTENT_VOLUME_ROOT}/tt_metal_cache/cache_repacked-${MODEL}"
export LLAMA3_CACHE_PATH="${DEFAULT_PERSISTENT_VOLUME_ROOT}/tt_metal_cache/cache_${REPACKED_STR}${MODEL}"
export ARCH_NAME=wormhole_b0
export TT_METAL_HOME=$(pwd)
export PYTHONPATH=$(pwd)
Expand Down Expand Up @@ -100,12 +201,19 @@ setup_weights() {
popd >/dev/null
fi

print_step "Repacking weights"
source python_env/bin/activate
huggingface-cli login
cp "${LLAMA_WEIGHTS_DIR}/tokenizer.model" "${WEIGHTS_DIR}/tokenizer.model"
cp "${LLAMA_WEIGHTS_DIR}/params.json" "${WEIGHTS_DIR}/params.json"
python models/demos/t3000/llama2_70b/scripts/repack_weights.py "${LLAMA_WEIGHTS_DIR}" "${WEIGHTS_DIR}" 5

if [ "${REPACKED}" -eq 1 ]; then
print_step "Repacking weights"
source python_env/bin/activate
cp "${LLAMA_WEIGHTS_DIR}/tokenizer.model" "${WEIGHTS_DIR}/tokenizer.model"
cp "${LLAMA_WEIGHTS_DIR}/params.json" "${WEIGHTS_DIR}/params.json"
python models/demos/t3000/llama2_70b/scripts/repack_weights.py "${LLAMA_WEIGHTS_DIR}" "${WEIGHTS_DIR}" 5
else
cp -rf "${LLAMA_WEIGHTS_DIR}" "${WEIGHTS_DIR}"
fi

echo "🔔 Using weights directory ${WEIGHTS_DIR}"
}

install_vllm() {
Expand All @@ -132,6 +240,8 @@ deploy_server() {
}

# ---- MAIN ----
MODEL_TYPE=$1
setup_model_environment "$MODEL_TYPE"
setup_environment
check_and_build_tt_metal
clone_repo "${DEFAULT_LLAMA_REPO}" "https://github.com/meta-llama/llama-models.git" "685ac4c107c75ce8c291248710bf990a876e1623"
Expand Down

0 comments on commit be00c2f

Please sign in to comment.