(T3K) T3000 model perf tests #321

Workflow file for this run

.github/workflows/t3000-model-perf-tests.yaml at 705f471

	name: "(T3K) T3000 model perf tests"

	on:
	workflow_dispatch:
	schedule:
	- cron: "0 /12 * *" # This cron schedule runs the workflow every 12 hours

	jobs:
	build-artifact:
	uses: ./.github/workflows/build-artifact.yaml
	with:
	arch: '["wormhole_b0"]'
	secrets: inherit
	t3000-model-perf-tests:
	needs: build-artifact
	strategy:
	fail-fast: false
	matrix:
	test-group: [
	{ name: "t3k LLM falcon7b model perf tests", model: "falcob7b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 60,
	runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-perf", "runs-llm-falcon"], owner_id: S07AJBTLX2L}, #Model Falcon
	{ name: "t3k LLM mixtral model perf tests", model: "mixtral", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 60,
	runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-perf" ], owner_id: U03PUAKE719}, # Miguel Tairum
	{ name: "t3k LLM llama2 model perf tests", model: "llama2", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 60,
	runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-perf"], owner_id: U03FJB5TM5Y}, #Colman Glagovich
	{ name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 60,
	runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "runner-test", "bare-metal", "pipeline-perf", "runs-llm-falcon"], owner_id: S07AJBTLX2L}, # Model Falcon
	#{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run?
	]
	name: ${{ matrix.test-group.name }}
	env:
	TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
	ARCH_NAME: ${{ matrix.test-group.arch }}
	LOGURU_LEVEL: INFO
	LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
	environment: dev
	runs-on: ${{ matrix.test-group.runs-on }}
	steps:
	- uses: tenstorrent-metal/metal-workflows/.github/actions/[email protected]
	- name: Enable performance mode
	run: \|
	sudo cpupower frequency-set -g performance
	- name: Ensure weka mount is active
	run: \|
	sudo systemctl restart mnt-MLPerf.mount
	sudo /etc/rc.local
	ls -al /mnt/MLPerf/bit_error_tests
	- name: Set up dynamic env vars for build
	run: \|
	echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
	echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
	- uses: actions/download-artifact@v4
	with:
	name: TTMetal_build_${{ matrix.test-group.arch }}
	- name: Extract files
	run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
	- uses: ./.github/actions/install-python-deps
	- name: Run model perf regression tests
	timeout-minutes: ${{ matrix.test-group.timeout }}
	run: \|
	source ${{ github.workspace }}/python_env/bin/activate
	cd $TT_METAL_HOME
	export PYTHONPATH=$TT_METAL_HOME
	source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_model_perf_tests.sh
	${{ matrix.test-group.cmd }}
	env python models/perf/merge_perf_results.py
	- name: Check perf report exists
	id: check-perf-report
	if: ${{ !cancelled() }}
	run: \|
	ls -hal
	export PERF_REPORT_FILENAME="Models_Perf_$(date +%Y_%m_%d).csv"
	ls -hal $PERF_REPORT_FILENAME
	echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT"
	- name: Upload perf report
	if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }}
	uses: actions/upload-artifact@v4
	with:
	name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.model }}-bare-metal
	path: "${{ steps.check-perf-report.outputs.perf_report_filename }}"
	- uses: ./.github/actions/slack-report
	if: ${{ failure() }}
	with:
	slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
	owner: ${{ matrix.test-group.owner_id }}
	- name: Disable performance mode
	if: always()
	run: \|
	sudo cpupower frequency-set -g ondemand

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

(T3K) T3000 model perf tests #321

Workflow file

(T3K) T3000 model perf tests #321

Jobs

Run details

Workflow file for this run