From e803b9c3ff31b46112fba762e70ad74173844408 Mon Sep 17 00:00:00 2001 From: Aswinmcw Date: Thu, 14 Nov 2024 05:41:41 +0000 Subject: [PATCH] Combine job CI check --- .../t3000-model-perf-tests-impl.yaml | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/.github/workflows/t3000-model-perf-tests-impl.yaml b/.github/workflows/t3000-model-perf-tests-impl.yaml index dbc8b6792312..01c9349fa8ac 100644 --- a/.github/workflows/t3000-model-perf-tests-impl.yaml +++ b/.github/workflows/t3000-model-perf-tests-impl.yaml @@ -22,8 +22,7 @@ jobs: { name: "t3k LLM llama3 model perf tests", model: "llama3", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama3_tests, timeout: 60, owner_id: U03PUAKE719}, # Miguel Tairum { name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic { name: "t3k CNN resnet50 model perf tests", model: "resnet50", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_resnet50_tests, timeout: 75, owner_id: U013121KDH9}, # Austin Ho - { name: "t3k CCL all_gather perf tests", model: "all_gather", arch: wormhole_b0, cmd: run_t3000_ccl_all_gather_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar - { name: "t3k CCL reduce_scatter perf tests", model: "reduce_scatter", arch: wormhole_b0, cmd: run_t3000_ccl_reduce_scatter_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar + { name: "t3k CCL perf tests", arch: wormhole_b0, cmd: run_t3000_ccl_all_gather_perf_tests && run_t3000_ccl_reduce_scatter_perf_tests, timeout: 75, tracy: true, owner_id: ULMEPM2MA}, # Sean Nijjar #{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run? ] name: ${{ matrix.test-group.name }} @@ -81,15 +80,25 @@ jobs: run: | TODAY=$(date +%Y_%m_%d) PERF_REPORT_FILENAME_MODELS="Models_Perf_${TODAY}.csv" - PERF_REPORT_FILENAME_CCL="CCL_${{ matrix.test-group.model }}_Perf_${TODAY}.csv" + PERF_REPORT_FILENAME_CCL_ALL_GATHER="CCL_all_gather_Perf_${TODAY}.csv" + PERF_REPORT_FILENAME_CCL_REDUCE_SCATTER="CCL_reduce_scatter_Perf_${TODAY}.csv" if [ "${{ matrix.test-group.tracy }}" == "true" ]; then - if [ -f "$PERF_REPORT_FILENAME_CCL" ]; then - echo "Found CCL Perf report: $PERF_REPORT_FILENAME_CCL" - echo "perf_report_filename=$PERF_REPORT_FILENAME_CCL" >> "$GITHUB_OUTPUT" - else + found_reports=false + if [ -f "$PERF_REPORT_FILENAME_CCL_ALL_GATHER" ]; then + echo "Found CCL AllGather Perf report: $PERF_REPORT_FILENAME_CCL_ALL_GATHER" + echo "perf_report_filename_all_gather=$PERF_REPORT_FILENAME_CCL_ALL_GATHER" >> "$GITHUB_OUTPUT" + found_reports=true + fi + if [ -f "$PERF_REPORT_FILENAME_CCL_REDUCE_SCATTER" ]; then + echo "Found CCL ReduceScatter Perf report: $PERF_REPORT_FILENAME_CCL_REDUCE_SCATTER" + echo "perf_report_filename_reduce_scatter=$PERF_REPORT_FILENAME_CCL_REDUCE_SCATTER" >> "$GITHUB_OUTPUT" + found_reports=true + fi + if [ "$found_reports" = false ]; then echo "No CCL perf report found for today." exit 1 fi + fi else if [ -f "$PERF_REPORT_FILENAME_MODELS" ]; then echo "Found Models Perf report: $PERF_REPORT_FILENAME_MODELS"