Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' into simple-githash-embed
Browse files Browse the repository at this point in the history
  • Loading branch information
dbarbuzzi committed Jun 25, 2024
2 parents cc05a07 + 05c3004 commit 56824a1
Show file tree
Hide file tree
Showing 27 changed files with 192 additions and 465 deletions.
37 changes: 6 additions & 31 deletions .github/actions/nm-build-vllm/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,9 @@ inputs:
venv:
description: 'name for python virtual environment'
required: true
pypi:
description: 'ip address for pypi server'
required: true
outputs:
build_status:
description: "final status from 'pip install -e'"
value: ${{ steps.build.outputs.build_status }}
whl_status:
description: "final status from 'pip3 wheel --no-deps -w dist'"
description: "final status from constructing the whl"
value: ${{ steps.build.outputs.whl_status }}
whl:
description: 'basename for generated whl'
Expand All @@ -39,43 +33,24 @@ runs:
sed -i 's/"__version__",/"__commit__",\n "__version__",/' vllm/__init__.py
source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
# TODO: adjust when we need a proper release. use nightly now.
pip3 install -r requirements-cuda.txt -r requirements-build.txt
# build
SUCCESS=0
pip3 install -e . || SUCCESS=$?
echo "build_status=${SUCCESS}" >> "$GITHUB_OUTPUT"
if [ ${SUCCESS} -ne 0 ]; then
exit 1
fi
# strip binaries
if [ ! $(command -v strip) ]; then
sudo apt install -y binutils
fi
if [ ! $(command -v file) ]; then
sudo apt install -y file
fi
for eachso in $(find . -type f -name '*.so')
do
strip $eachso
file $eachso
done
# whl
SUCCESS=0
pip3 wheel --no-deps -w dist . || SUCCESS=$?
python setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 || SUCCESS=$?
echo "whl_status=${SUCCESS}" >> "$GITHUB_OUTPUT"
BASE=$(./.github/scripts/convert-version ${{ inputs.python }})
ls -alh dist
WHL_FILEPATH=$(find dist -iname "*${BASE}*.whl")
WHL_FILEPATH=$(find dist -type f -iname "*linux_x86_64.whl")
echo "whl: ${WHL_FILEPATH}"
RENAME=$(echo ${WHL_FILEPATH} | sed -e 's/linux_x86_64/manylinux_2_17_x86_64/')
echo "rename: ${RENAME}"
mv ${WHL_FILEPATH} ${RENAME}
WHL=$(basename ${RENAME})
echo "whl=${WHL}" >> "$GITHUB_OUTPUT"
if [ ${SUCCESS} -ne 0 ]; then
exit 1
fi
# sdist
python3 setup.py sdist || SUCCESS=$?
python setup.py sdist || SUCCESS=$?
pyenv uninstall --force ${{ inputs.python}}/envs/${VENV}
ls -alh dist
TAR_FILEPATH=$(find dist -type f -iname "*.tar.gz")
Expand Down
11 changes: 11 additions & 0 deletions .github/actions/nm-caches/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: set up caches
description: 'set up HF and Python caches'
runs:
using: composite
steps:
- run: |
sudo mkdir -m 777 -p ${HF_HOME}
sudo chown -R $(whoami):$(whoami) ${HF_HOME}
sudo mkdir -m 777 -p ${PIP_CACHE_DIR}
sudo chown -R $(whoami):$(whoami) ${PIP_CACHE_DIR}
shell: bash
13 changes: 0 additions & 13 deletions .github/actions/nm-hf-cache/action.yml

This file was deleted.

3 changes: 1 addition & 2 deletions .github/actions/nm-install-whl/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ runs:
source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
fi
pip3 install -r requirements-dev.txt
BASE=$(./.github/scripts/convert-version ${{ inputs.python }})
WHL=$(find . -type f -iname "*${BASE}*.whl")
WHL=$(find . -type f -iname "nm_vllm*.whl")
WHL_BASENAME=$(basename ${WHL})
echo "whl=${WHL_BASENAME}" >> "$GITHUB_OUTPUT"
pip3 install ${WHL}[sparse] --extra-index-url https://pypi.neuralmagic.com/simple
Expand Down
2 changes: 2 additions & 0 deletions .github/actions/nm-set-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ runs:
# HF Cache
echo "HF_TOKEN=${HF_TOKEN_SECRET}" >> $GITHUB_ENV
echo "HF_HOME=/model-cache" >> $GITHUB_ENV
# Python cache
echo "PIP_CACHE_DIR=/model-cache/python-cache" >> $GITHUB_ENV
# build
NUM_THREADS=$(./.github/scripts/determine-threading -G ${{ inputs.Gi_per_thread }})
echo "MAX_JOBS=${NUM_THREADS}" >> $GITHUB_ENV
Expand Down
4 changes: 0 additions & 4 deletions .github/actions/nm-summary-build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ inputs:
python:
description: 'python version info'
required: true
build_status:
description: 'status from build step'
required: true
whl_status:
description: 'status from build step'
required: true
Expand All @@ -37,6 +34,5 @@ runs:
echo "| gitref: | '${{ inputs.gitref }}' |" >> $GITHUB_STEP_SUMMARY
echo "| branch name: | '${{ github.ref_name }}' |" >> $GITHUB_STEP_SUMMARY
echo "| python: | ${{ inputs.python }} |" >> $GITHUB_STEP_SUMMARY
echo "| build: | ${BUILD_EMOJI} |" >> $GITHUB_STEP_SUMMARY
echo "| whl: | ${WHL_EMOJI} |" >> $GITHUB_STEP_SUMMARY
shell: bash
4 changes: 3 additions & 1 deletion .github/actions/nm-test-whl/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ runs:
steps:
- id: test_whl
run: |
sudo mkdir -m 777 -p /usr/local/apps
sudo chown -R $(whoami):$(whoami) /usr/local/apps
pip install coverage
pip install pytest-cov
pip install pytest-xdist
pip install -r requirements-dev.txt
SUCCESS=0
VLLM_SRC=$(python3 -c "import vllm; print(vllm.__path__[0])")
./.github/scripts/run-tests -s ${VLLM_SRC} -t ${{ inputs.test_directory }} -r ${{ inputs.test_results }} -f ${{ inputs.test_skip_list }}|| SUCCESS=$?
./.github/scripts/run-tests -s ${VLLM_SRC} -t ${{ inputs.test_directory }} -r ${{ inputs.test_results }} || SUCCESS=$?
pytest ./neuralmagic/tests/test_nm-vllm_licenses.py --junitxml=${{ inputs.test_results }}/test_nm-vllm_licenses.xml
echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
exit ${SUCCESS}
Expand Down
34 changes: 0 additions & 34 deletions .github/scripts/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ usage() {
echo " -s - src directory, i.e. location of package *.py files."
echo " -t - test directory, i.e. location of *.py test files. (default 'tests/')"
echo " -r - desired results base directory. xml results will mirror provided tests directory structure. (default 'test-results/')"
echo " -f - file with test skip list, e.g. ' neuralmagic/tests/skip-for-remote-push.txt'. (default is to run all found tests)"
echo " -h - this list of options"
echo
echo "note: all paths are relative to 'nm-vllm' root"
Expand All @@ -35,9 +34,6 @@ while getopts "hs:t:r:f:" OPT; do
r)
RESULTS_DIR="${OPTARG}"
;;
f)
SKIP_LIST="${OPTARG}"
;;
esac
done

Expand Down Expand Up @@ -71,36 +67,6 @@ for FOUND in "${TESTS_FOUND[@]}"; do
echo "${FOUND}"
done

# build the skip list from provided file
declare -a TESTS_TO_EXCLUDE
if [ -f "${SKIP_LIST}" ]; then
while IFS= read -r line
do
TESTS_TO_EXCLUDE+=("${line}")
done < "${SKIP_LIST}"
fi

echo "..."
for EXCLUDE in "${TESTS_TO_EXCLUDE[@]}"; do
for JJ in "${!TESTS_FOUND[@]}"; do
if [[ ${TESTS_FOUND[$JJ]} = ${EXCLUDE} ]]; then
echo "excluding: ${EXCLUDE}"
unset 'TESTS_FOUND[$JJ]'
fi
done
done

echo "..."
echo "planning to run:"
for TEST in "${TESTS_FOUND[@]}"
do
echo "${TEST}"
done
echo "..."

# download required artifacts for testing
# (cd ${TEST_DIR} && sudo bash ../.buildkite/download-images.sh)

# run selected tests
SUCCESS=0
CC_PYTEST_FLAGS="--cov=${SRC_DIR} --cov=${TEST_DIR} --cov-report=html:cc-vllm-html --cov-append"
Expand Down
17 changes: 6 additions & 11 deletions .github/workflows/nm-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ on:
required: true
push_benchmark_results_to_gh_pages:
description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI"
type: string
type: boolean
required: true

# makes workflow manually callable
Expand Down Expand Up @@ -61,11 +61,8 @@ on:
required: true
push_benchmark_results_to_gh_pages:
description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI"
type: choice
options:
- 'true'
- 'false'
default: 'false'
type: boolean
default: false

env:
BENCHMARK_RESULTS: /model-cache/benchmark_results
Expand Down Expand Up @@ -109,11 +106,9 @@ jobs:
Gi_per_thread: 1
nvcc_threads: 0

- name: hf cache
id: hf_cache
uses: ./.github/actions/nm-hf-cache/
with:
fs_cache: ${{ secrets.HF_FS_CACHE }}
- name: caches
id: caches
uses: ./.github/actions/nm-caches/

- name: download whl
id: download
Expand Down
Loading

1 comment on commit 56824a1

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: 56824a1 Previous: 9b2e107 Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.519916640633788 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 967.6479900033747 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.