Large model integration tests with P4D and compiler optimizations

Large model integration tests with P4D and compiler optimizations #113

Workflow file for this run

.github/workflows/llm_integration_p4d.yml at cce574e

	name: Large model integration tests with P4D and compiler optimizations

	on:
	workflow_dispatch:
	inputs:
	djl-version:
	description: 'The released version of DJL'
	required: false
	default: ''
	run_test:
	description: 'Run only the tests you need [aiccl]'
	required: false
	default: ''
	schedule:
	- cron: '0 15 * * *'


	jobs:
	create-runners-p4d:
	runs-on: [self-hosted, scheduler]
	steps:
	- name: Create new P4d.24xl instance
	id: create_gpu_p4d
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_lmic_p4d $token djl-serving
	outputs:
	p4d_instance_id: ${{ steps.create_gpu_p4d.outputs.action_lmic_p4d_instance_id }}

	lmi-dist-aiccl-test:
	if: contains(fromJson('["", "aiccl"]'), github.event.inputs.run_test)
	runs-on: [ self-hosted, p4d ]
	timeout-minutes: 120
	needs: create-runners-p4d
	steps:
	- uses: actions/checkout@v4
	- name: Clean env
	run: \|
	yes \| docker system prune -a --volumes
	sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
	echo "wait dpkg lock..."
	while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
	- name: Set up Python3
	uses: actions/setup-python@v5
	with:
	python-version: '3.10.x'
	- name: Install pip dependencies
	run: pip3 install requests numpy
	- name: Build container name
	run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }}
	- name: Download models and dockers
	working-directory: tests/integration
	run: \|
	docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
	- name: Test Mixtral-8x7B with aiccl backend
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py lmi_dist_aiccl mixtral-8x7b-aiccl
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
	serve
	python3 llm/client.py lmi_dist_aiccl mixtral-8x7b-aiccl
	if [ "$(docker logs $(docker ps -aq) 2>&1 \| grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
	echo "aiccl backend not used"
	else
	echo "Using aiccl backend"
	fi
	docker rm -f $(docker ps -aq)
	- name: Test Llama-2-70B with aiccl backend
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py lmi_dist_aiccl llama-2-70b-aiccl
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
	serve
	python3 llm/client.py lmi_dist_aiccl llama-2-70b-aiccl
	if [ "$(docker logs $(docker ps -aq) 2>&1 \| grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
	echo "aiccl backend not used"
	else
	echo "Using aiccl backend"
	fi
	docker rm -f $(docker ps -aq)
	- name: Test codellama/CodeLlama-34b-hf with aiccl backend
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py lmi_dist_aiccl codellama-34b-aiccl
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
	serve
	python3 llm/client.py lmi_dist_aiccl codellama-34b-aiccl
	if [ "$(docker logs $(docker ps -aq) 2>&1 \| grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
	echo "aiccl backend not used"
	else
	echo "Using aiccl backend"
	fi
	docker rm -f $(docker ps -aq)
	- name: Test tiiuae/falcon-40b with aiccl backend
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py lmi_dist_aiccl falcon-40b-aiccl
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
	serve
	python3 llm/client.py lmi_dist_aiccl falcon-40b-aiccl
	if [ "$(docker logs $(docker ps -aq) 2>&1 \| grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
	echo "aiccl backend not used"
	else
	echo "Using aiccl backend"
	fi
	docker rm -f $(docker ps -aq)
	- name: Remove models dir
	working-directory: tests/integration
	run: \|
	sudo rm -rf models
	- name: On fail step
	if: ${{ failure() }}
	working-directory: tests/integration
	run: \|
	sudo rm -rf models
	docker rm -f $(docker ps -aq) \|\| true
	cat logs/serving.log
	- name: Upload test logs
	uses: actions/upload-artifact@v3
	with:
	name: lmi-dist-aiccl-logs
	path: tests/integration/logs/

	trtllm-test:
	runs-on: [ self-hosted, p4d ]
	timeout-minutes: 120
	needs: create-runners-p4d
	steps:
	- uses: actions/checkout@v4
	- name: Clean env
	run: \|
	yes \| docker system prune -a --volumes
	sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
	echo "wait dpkg lock..."
	while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
	- name: Set up Python3
	uses: actions/setup-python@v5
	with:
	python-version: '3.10.x'
	- name: Install pip dependencies
	run: pip3 install requests numpy
	- name: Build container name
	run: ./serving/docker/scripts/docker_name_builder.sh tensorrt-llm ${{ github.event.inputs.djl-version }}
	- name: Download models and dockers
	working-directory: tests/integration
	run: \|
	docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
	- name: Test llama-2-70B with TP8
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py trtllm llama2-70b
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
	serve
	python3 llm/client.py trtllm llama2-70b
	docker rm -f $(docker ps -aq)
	- name: Test mixtral-8x7b with with TP8
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py trtllm mixtral-8x7b
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
	serve
	python3 llm/client.py trtllm mixtral-8x7b
	docker rm -f $(docker ps -aq)
	- name: Remove models dir
	working-directory: tests/integration
	run: \|
	sudo rm -rf models
	- name: On fail step
	if: ${{ failure() }}
	working-directory: tests/integration
	run: \|
	sudo rm -rf models
	docker rm -f $(docker ps -aq) \|\| true
	cat logs/serving.log
	- name: Upload test logs
	uses: actions/upload-artifact@v3
	with:
	name: trtllm-logs
	path: tests/integration/logs/

	vllm-test:
	runs-on: [ self-hosted, p4d ]
	timeout-minutes: 120
	needs: create-runners-p4d
	steps:
	- uses: actions/checkout@v4
	- name: Clean env
	run: \|
	yes \| docker system prune -a --volumes
	sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
	echo "wait dpkg lock..."
	while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
	- name: Set up Python3
	uses: actions/setup-python@v5
	with:
	python-version: '3.10.x'
	- name: Install pip dependencies
	run: pip3 install requests numpy
	- name: Build container name
	run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }}
	- name: Download models and dockers
	working-directory: tests/integration
	run: \|
	docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
	- name: Test llama-2-70B with TP8
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py vllm llama2-70b
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
	serve
	python3 llm/client.py vllm llama2-70b
	docker rm -f $(docker ps -aq)
	- name: Test mixtral-8x7b with with TP8
	working-directory: tests/integration
	run: \|
	rm -rf models
	python3 llm/prepare.py vllm mixtral-8x7b
	./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
	serve
	python3 llm/client.py vllm mixtral-8x7b
	docker rm -f $(docker ps -aq)
	- name: Remove models dir
	working-directory: tests/integration
	run: \|
	sudo rm -rf models
	- name: On fail step
	if: ${{ failure() }}
	working-directory: tests/integration
	run: \|
	sudo rm -rf models
	docker rm -f $(docker ps -aq) \|\| true
	cat logs/serving.log
	- name: Upload test logs
	uses: actions/upload-artifact@v3
	with:
	name: trtllm-logs
	path: tests/integration/logs/

	stop-runners-p4d:
	if: always()
	runs-on: [ self-hosted, scheduler ]
	needs: [ create-runners-p4d, lmi-dist-aiccl-test, trtllm-test, vllm-test ]
	steps:
	- name: Stop all instances
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	instance_id=${{ needs.create-runners-p4d.outputs.p4d_instance_id }}
	./stop_instance.sh $instance_id

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Large model integration tests with P4D and compiler optimizations #113

Workflow file

Large model integration tests with P4D and compiler optimizations #113

Jobs

Run details

Workflow file for this run