Merge branch 'main' into simple-githash-embed

neuralmagic · Jun 25, 2024 · 56824a1 · 56824a1 · github-actions · Jun 25, 2024
2 parents cc05a07 + 05c3004
commit 56824a1
Show file tree

Hide file tree

Showing 27 changed files with 192 additions and 465 deletions.
diff --git a/.github/actions/nm-build-vllm/action.yml b/.github/actions/nm-build-vllm/action.yml
@@ -7,15 +7,9 @@ inputs:
   venv:
     description: 'name for python virtual environment'
     required: true
-  pypi:
-    description: 'ip address for pypi server'
-    required: true
 outputs:
-  build_status:
-    description: "final status from 'pip install -e'"
-    value: ${{ steps.build.outputs.build_status }}
   whl_status:
-    description: "final status from 'pip3 wheel --no-deps -w dist'"
+    description: "final status from constructing the whl"
     value: ${{ steps.build.outputs.whl_status }}
   whl:
     description: 'basename for generated whl'
@@ -39,43 +33,24 @@ runs:
       sed -i 's/"__version__",/"__commit__",\n    "__version__",/' vllm/__init__.py
 
       source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
-      # TODO: adjust when we need a proper release. use nightly now.
       pip3 install -r requirements-cuda.txt -r requirements-build.txt
-      # build
-      SUCCESS=0
-      pip3 install -e . || SUCCESS=$?
-      echo "build_status=${SUCCESS}" >> "$GITHUB_OUTPUT"
-      if [ ${SUCCESS} -ne 0 ]; then
-        exit 1
-      fi
-      # strip binaries
-      if [ ! $(command -v strip) ]; then
-          sudo apt install -y binutils
-      fi
-      if [ ! $(command -v file) ]; then
-          sudo apt install -y file
-      fi
-      for eachso in $(find . -type f -name '*.so')
-      do
-          strip $eachso
-          file $eachso
-      done
       # whl
       SUCCESS=0
-      pip3 wheel --no-deps -w dist . || SUCCESS=$?
+      python setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 || SUCCESS=$?
       echo "whl_status=${SUCCESS}" >> "$GITHUB_OUTPUT"
-      BASE=$(./.github/scripts/convert-version ${{ inputs.python }})
       ls -alh dist
-      WHL_FILEPATH=$(find dist -iname "*${BASE}*.whl")
+      WHL_FILEPATH=$(find dist -type f -iname "*linux_x86_64.whl")
+      echo "whl: ${WHL_FILEPATH}"
       RENAME=$(echo ${WHL_FILEPATH} | sed -e 's/linux_x86_64/manylinux_2_17_x86_64/')
+      echo "rename: ${RENAME}"
       mv ${WHL_FILEPATH} ${RENAME}
       WHL=$(basename ${RENAME})
       echo "whl=${WHL}" >> "$GITHUB_OUTPUT"
       if [ ${SUCCESS} -ne 0 ]; then
         exit 1
       fi
       # sdist
-      python3 setup.py sdist || SUCCESS=$?
+      python setup.py sdist || SUCCESS=$?
       pyenv uninstall --force ${{ inputs.python}}/envs/${VENV}
       ls -alh dist
       TAR_FILEPATH=$(find dist -type f -iname "*.tar.gz")

diff --git a/.github/actions/nm-caches/action.yml b/.github/actions/nm-caches/action.yml
@@ -0,0 +1,11 @@
+name: set up caches
+description: 'set up HF and Python caches'
+runs:
+  using: composite
+  steps:
+  - run: |
+      sudo mkdir -m 777 -p ${HF_HOME}
+      sudo chown -R $(whoami):$(whoami) ${HF_HOME}
+      sudo mkdir -m 777 -p ${PIP_CACHE_DIR}
+      sudo chown -R $(whoami):$(whoami) ${PIP_CACHE_DIR}
+    shell: bash
diff --git a/.github/actions/nm-hf-cache/action.yml b/.github/actions/nm-hf-cache/action.yml
diff --git a/.github/actions/nm-install-whl/action.yml b/.github/actions/nm-install-whl/action.yml
@@ -22,8 +22,7 @@ runs:
           source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
         fi
         pip3 install -r requirements-dev.txt
-        BASE=$(./.github/scripts/convert-version ${{ inputs.python }})
-        WHL=$(find . -type f -iname "*${BASE}*.whl")
+        WHL=$(find . -type f -iname "nm_vllm*.whl")
         WHL_BASENAME=$(basename ${WHL})
         echo "whl=${WHL_BASENAME}" >> "$GITHUB_OUTPUT"
         pip3 install ${WHL}[sparse] --extra-index-url https://pypi.neuralmagic.com/simple

diff --git a/.github/actions/nm-set-env/action.yml b/.github/actions/nm-set-env/action.yml
@@ -28,6 +28,8 @@ runs:
       # HF Cache
       echo "HF_TOKEN=${HF_TOKEN_SECRET}" >> $GITHUB_ENV
       echo "HF_HOME=/model-cache" >> $GITHUB_ENV
+      # Python cache
+      echo "PIP_CACHE_DIR=/model-cache/python-cache" >> $GITHUB_ENV
       # build
       NUM_THREADS=$(./.github/scripts/determine-threading -G ${{ inputs.Gi_per_thread }})
       echo "MAX_JOBS=${NUM_THREADS}" >> $GITHUB_ENV

diff --git a/.github/actions/nm-summary-build/action.yml b/.github/actions/nm-summary-build/action.yml
@@ -13,9 +13,6 @@ inputs:
   python:
     description: 'python version info'
     required: true
-  build_status:
-    description: 'status from build step'
-    required: true
   whl_status:
     description: 'status from build step'
     required: true
@@ -37,6 +34,5 @@ runs:
       echo "| gitref: | '${{ inputs.gitref }}' |" >> $GITHUB_STEP_SUMMARY
       echo "| branch name: | '${{ github.ref_name }}' |" >> $GITHUB_STEP_SUMMARY
       echo "| python: | ${{ inputs.python }} |" >> $GITHUB_STEP_SUMMARY
-      echo "| build: | ${BUILD_EMOJI} |" >> $GITHUB_STEP_SUMMARY
       echo "| whl: | ${WHL_EMOJI} |" >> $GITHUB_STEP_SUMMARY
     shell: bash
diff --git a/.github/actions/nm-test-whl/action.yml b/.github/actions/nm-test-whl/action.yml
@@ -19,13 +19,15 @@ runs:
   steps:
     - id: test_whl
       run: |
+        sudo mkdir -m 777 -p /usr/local/apps
+        sudo chown -R $(whoami):$(whoami) /usr/local/apps
         pip install coverage
         pip install pytest-cov
         pip install pytest-xdist
         pip install -r requirements-dev.txt
         SUCCESS=0
         VLLM_SRC=$(python3 -c "import vllm; print(vllm.__path__[0])")
-        ./.github/scripts/run-tests -s ${VLLM_SRC} -t ${{ inputs.test_directory }} -r ${{ inputs.test_results }} -f ${{ inputs.test_skip_list }}|| SUCCESS=$?
+        ./.github/scripts/run-tests -s ${VLLM_SRC} -t ${{ inputs.test_directory }} -r ${{ inputs.test_results }} || SUCCESS=$?
         pytest ./neuralmagic/tests/test_nm-vllm_licenses.py --junitxml=${{ inputs.test_results }}/test_nm-vllm_licenses.xml
         echo "status=${SUCCESS}" >> "$GITHUB_OUTPUT"
         exit ${SUCCESS}

diff --git a/.github/scripts/run-tests b/.github/scripts/run-tests
@@ -8,7 +8,6 @@ usage() {
     echo "  -s    - src directory, i.e. location of package *.py files."
     echo "  -t    - test directory, i.e. location of *.py test files. (default 'tests/')"
     echo "  -r    - desired results base directory. xml results will mirror provided tests directory structure. (default 'test-results/')"
-    echo "  -f    - file with test skip list, e.g. ' neuralmagic/tests/skip-for-remote-push.txt'. (default is to run all found tests)"
     echo "  -h    - this list of options"
     echo
     echo "note: all paths are relative to 'nm-vllm' root"
@@ -35,9 +34,6 @@ while getopts "hs:t:r:f:" OPT; do
     r)
         RESULTS_DIR="${OPTARG}"
         ;;
-    f)
-        SKIP_LIST="${OPTARG}"
-        ;;
     esac
 done
 
@@ -71,36 +67,6 @@ for FOUND in "${TESTS_FOUND[@]}"; do
     echo "${FOUND}"
 done
 
-# build the skip list from provided file
-declare -a TESTS_TO_EXCLUDE
-if [ -f "${SKIP_LIST}" ]; then
-    while IFS= read -r line
-    do
-        TESTS_TO_EXCLUDE+=("${line}")
-    done < "${SKIP_LIST}"
-fi
-
-echo "..."
-for EXCLUDE in "${TESTS_TO_EXCLUDE[@]}"; do
-    for JJ in "${!TESTS_FOUND[@]}"; do
-        if [[ ${TESTS_FOUND[$JJ]} = ${EXCLUDE} ]]; then
-            echo "excluding: ${EXCLUDE}"
-            unset 'TESTS_FOUND[$JJ]'
-        fi
-    done
-done
-
-echo "..."
-echo "planning to run:"
-for TEST in "${TESTS_FOUND[@]}"
-do
-    echo "${TEST}"
-done
-echo "..."
-
-# download required artifacts for testing
-# (cd ${TEST_DIR} && sudo bash ../.buildkite/download-images.sh)
-
 # run selected tests
 SUCCESS=0
 CC_PYTEST_FLAGS="--cov=${SRC_DIR} --cov=${TEST_DIR} --cov-report=html:cc-vllm-html --cov-append"

diff --git a/.github/workflows/nm-benchmark.yml b/.github/workflows/nm-benchmark.yml
@@ -29,7 +29,7 @@ on:
         required: true
       push_benchmark_results_to_gh_pages:
         description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI"
-        type: string
+        type: boolean
         required: true
 
   # makes workflow manually callable
@@ -61,11 +61,8 @@ on:
         required: true
       push_benchmark_results_to_gh_pages:
         description: "When set to true, the workflow pushes all benchmarking results to gh-pages UI"
-        type: choice
-        options:
-          - 'true'
-          - 'false'
-        default: 'false'
+        type: boolean
+        default: false
 
 env:
     BENCHMARK_RESULTS: /model-cache/benchmark_results
@@ -109,11 +106,9 @@ jobs:
           Gi_per_thread: 1
           nvcc_threads: 0
 
-      - name: hf cache
-        id: hf_cache
-        uses: ./.github/actions/nm-hf-cache/
-        with:
-          fs_cache: ${{ secrets.HF_FS_CACHE }}
+      - name: caches
+        id: caches
+        uses: ./.github/actions/nm-caches/
 
       - name: download whl
         id: download
Benchmark suite	Current: `56824a1`	Previous: `9b2e107`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.519916640633788` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`967.6479900033747` tokens/s