.github/workflows/integration.yml

name: Integration tests

on:
  workflow_dispatch:
    inputs:
      djl-version:
        description: 'The released version of DJL'
        required: false
        default: ''
  schedule:
    - cron: '0 15 * * *'


jobs:
  create-runners:
    runs-on: [self-hosted, scheduler]
    steps:
      - name: Create new G6 instance
        id: create_gpu
        run: |
          cd /home/ubuntu/djl_benchmark_script/scripts
          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
          --fail \
          | jq '.token' | tr -d '"' )
          ./start_instance.sh action_g6 $token djl-serving
      - name: Create new G6 instance
        id: create_gpu2
        run: |
          cd /home/ubuntu/djl_benchmark_script/scripts
          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
          --fail \
          | jq '.token' | tr -d '"' )
          ./start_instance.sh action_g6 $token djl-serving
      - name: Create new G6 instance
        id: create_gpu3
        run: |
          cd /home/ubuntu/djl_benchmark_script/scripts
          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
          --fail \
          | jq '.token' | tr -d '"' )
          ./start_instance.sh action_g6 $token djl-serving
      - name: Create new G6 instance
        id: create_gpu4
        run: |
          cd /home/ubuntu/djl_benchmark_script/scripts
          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
          --fail \
          | jq '.token' | tr -d '"' )
          ./start_instance.sh action_g6 $token djl-serving
      - name: Create new Graviton instance
        id: create_aarch64
        run: |
          cd /home/ubuntu/djl_benchmark_script/scripts
          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
          --fail \
          | jq '.token' | tr -d '"' )
          ./start_instance.sh action_graviton $token djl-serving
      - name: Create new Inf2.24xl instance
        id: create_inf2
        run: |
          cd /home/ubuntu/djl_benchmark_script/scripts
          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
          --fail \
          | jq '.token' | tr -d '"' )
          ./start_instance.sh action_inf2 $token djl-serving
      - name: Create new Inf2.24xl instance
        id: create_inf2_2
        run: |
          cd /home/ubuntu/djl_benchmark_script/scripts
          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
          --fail \
          | jq '.token' | tr -d '"' )
          ./start_instance.sh action_inf2 $token djl-serving
    outputs:
      gpu_instance_id_1: ${{ steps.create_gpu.outputs.action_g6_instance_id }}
      gpu_instance_id_2: ${{ steps.create_gpu2.outputs.action_g6_instance_id }}
      gpu_instance_id_3: ${{ steps.create_gpu3.outputs.action_g6_instance_id }}
      gpu_instance_id_4: ${{ steps.create_gpu4.outputs.action_g6_instance_id }}
      aarch64_instance_id: ${{ steps.create_aarch64.outputs.action_graviton_instance_id }}
      inf2_instance_id_1: ${{ steps.create_inf2.outputs.action_inf2_instance_id }}
      inf2_instance_id_2: ${{ steps.create_inf2_2.outputs.action_inf2_instance_id }}


  test:
    runs-on:
      - ${{ matrix.test.gh-runner && matrix.test.instance || 'self-hosted' }}
      - ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_ID-{0}', github.run_id) }}
      - ${{ matrix.test.gh-runner && matrix.test.instance || format('RUN_NUMBER-{0}', github.run_number) }}
      - ${{ matrix.test.gh-runner && matrix.test.instance || format('SHA-{0}', github.sha) }}
      - ${{ matrix.test.instance }}
    timeout-minutes: 90
    needs: create-runners
    strategy:
      fail-fast: false
      matrix:
        test:
          - test: TestCpuFull
            instance: ubuntu-latest
            gh-runner: true
          - test: TestCpuBoth
            instance: ubuntu-latest
            gh-runner: true
          - test: TestGpu
            instance: g6
          - test: TestAarch64
            instance: aarch64
          - test: TestHfHandler
            instance: g6
          - test: TestTrtLlmHandler1
            instance: g6
          - test: TestTrtLlmHandler2
            instance: g6
          - test: TestSchedulerSingleGPU
            instance: g6
          - test: TestSchedulerMultiGPU
            instance: g6
          - test: TestLmiDist1
            instance: g6
          - test: TestLmiDist2
            instance: g6
          - test: TestVllm1
            instance: g6
          - test: TestVllmLora
            instance: g6
          - test: TestLmiDistLora
            instance: g6
          - test: TestNeuronx1
            instance: inf2
          - test: TestNeuronx2
            instance: inf2
          - test: TestNeuronxRollingBatch
            instance: inf2
          - test: TestMultiModal
            instance: g6
          - test: TestTextEmbedding
            instance: g6
          - test: TestLmiDistPipelineParallel
            instance: g6
    steps:
      - uses: actions/checkout@v4
      - name: Clean env
        run: |
          yes | docker system prune -a --volumes
          sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
          echo "wait dpkg lock..."
          while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
      - name: Set up Python3
        if: ${{ matrix.test.instance != 'aarch64' }}
        uses: actions/setup-python@v5
        with:
          python-version: '3.10.x'
      - name: Set up Python3 (aarch64)
        if: ${{ matrix.test.instance == 'aarch64' }}
        run: |
          # Using an alternate installation because of an incompatible combination
          # of aarch64 with ubuntu-20.04 not supported by the actions/setup-python
          sudo apt-get install python3 python-is-python3 python3-pip -y
      - name: Install pip dependencies
        run: pip3 install pytest requests "numpy<2" pillow huggingface_hub
      - name: Install torch
        # Use torch to get cuda capability of current device to selectively run tests
        # Torch version doesn't really matter that much
        run: |
          pip3 install torch==2.3.0
      - name: Install awscurl
        working-directory: tests/integration
        run: |
          wget https://publish.djl.ai/awscurl/awscurl
          chmod +x awscurl
          mkdir outputs
      - name: Test
        working-directory: tests/integration
        env:
          TEST_DJL_VERSION: ${{ inputs.djl-version }}
        run: |
          python -m pytest -k ${{ matrix.test.test }} tests.py
      - name: Cleanup
        working-directory: tests/integration
        run: |
          rm -rf outputs
          rm awscurl
      - name: On Failure
        if: ${{ failure() }}
        working-directory: tests/integration
        run: |
          for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done
          sudo rm -rf outputs && sudo rm -rf models
          rm awscurl
          ./remove_container.sh
      - name: Upload test logs
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: test-${{ matrix.test.test }}-logs
          path: tests/integration/all_logs/

  transformers-neuronx-container-unit-tests:
    runs-on:
      - self-hosted
      - inf2
      - RUN_ID-${{ github.run_id }}
      - RUN_NUMBER-${{ github.run_number }}
      - SHA-${{ github.sha }}
    timeout-minutes: 15
    needs: create-runners
    steps:
      - uses: actions/checkout@v4
      - name: Clean env
        run: |
          yes | docker system prune -a --volumes
          sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
          echo "wait dpkg lock..."
          while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
      - name: Set up Python3
        uses: actions/setup-python@v5
        with:
          python-version: '3.10.x'
      - name: Install pip dependencies
        run: pip3 install requests numpy pillow wheel
      - name: Build container name
        run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }}
      - name: Download models and dockers
        run: |
          docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
      - name: Run djl_python unit/integration tests on container
        working-directory: engines/python/setup
        run: |
          # Setup
          pip install setuptools
          python3 -m setup bdist_wheel
          mkdir logs
          docker run -t --rm --network="host" \
          --name neuron-test \
          -v $PWD/:/opt/ml/model/ \
          -w /opt/ml/model \
          --device=/dev/neuron0:/dev/neuron0 \
          deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG \
          /bin/bash -c "'pip install /opt/ml/model/dist/*.whl pytest' && \
          pytest djl_python/tests/neuron_test_scripts/ | tee logs/results.log"
          
          # Cleanup
          sudo rm -rf TinyLlama .pytest_cache djl_python
          
          # Fail on failed tests
          if grep -F "failed" logs/results.log &>/dev/null; then exit 1; fi
      - name: On fail step
        if: ${{ failure() }}
        working-directory: engines/python/setup
        run: |
          cat logs/results.log
      - name: Upload test logs
        uses: actions/upload-artifact@v4
        with:
          name: transformers-neuronx-${{ matrix.arch }}-logs
          path: engines/python/setup/logs/

  stop-runners:
    if: always()
    runs-on: [ self-hosted, scheduler ]
    needs: [ create-runners, test, transformers-neuronx-container-unit-tests]
    steps:
      - name: Stop all instances
        run: |
          cd /home/ubuntu/djl_benchmark_script/scripts
          instance_id=${{ needs.create-runners.outputs.gpu_instance_id_1 }}
          ./stop_instance.sh $instance_id
          instance_id=${{ needs.create-runners.outputs.gpu_instance_id_2 }}
          ./stop_instance.sh $instance_id
          instance_id=${{ needs.create-runners.outputs.gpu_instance_id_3 }}
          ./stop_instance.sh $instance_id
          instance_id=${{ needs.create-runners.outputs.gpu_instance_id_4 }}
          ./stop_instance.sh $instance_id
          instance_id=${{ needs.create-runners.outputs.aarch64_instance_id }}
          ./stop_instance.sh $instance_id
          instance_id=${{ needs.create-runners.outputs.inf2_instance_id_1 }}
          ./stop_instance.sh $instance_id
          instance_id=${{ needs.create-runners.outputs.inf2_instance_id_2 }}
          ./stop_instance.sh $instance_id