Skip to content

CI - Model Regression on schedule #23

CI - Model Regression on schedule

CI - Model Regression on schedule #23

# The docs: https://www.notion.so/rasa/The-CI-for-model-regression-tests-92af7185e08e4fb2a0c764770a8e9095
name: CI - Model Regression on schedule
on:
schedule:
# Run once a week
- cron: "1 23 * * */7"
env:
GKE_ZONE: us-central1
GCLOUD_VERSION: "318.0.0"
TF_FORCE_GPU_ALLOW_GROWTH: true
GITHUB_ISSUE_LABELS: '["type:bug :bug:", "tool:model-regression-tests"]'
PERFORMANCE_DROP_THRESHOLD: -0.05
NVML_INTERVAL_IN_SEC: 1
jobs:
read_test_configuration:
name: Reads tests configuration
runs-on: ubuntu-22.04
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
matrix_length: ${{ steps.set-matrix.outputs.matrix_length }}
steps:
- name: Checkout main
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
- name: Checkout dataset
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
with:
repository: ${{ secrets.DATASET_REPOSITORY }}
token: ${{ secrets.ML_TEST_SA_PAT }}
path: "dataset"
ref: "main"
- name: Download gomplate
run: |-
sudo curl -o /usr/local/bin/gomplate -sSL https://github.com/hairyhenderson/gomplate/releases/download/v3.9.0/gomplate_linux-amd64
sudo chmod +x /usr/local/bin/gomplate
- name: Check if a configuration file exists
run: test -f .github/configs/mr-test-schedule.json
- name: Set matrix values
id: set-matrix
shell: bash
run: |-
matrix=$(gomplate -d mapping=./dataset/dataset_config_mapping.json -d github=.github/configs/mr-test-schedule.json -f .github/templates/model_regression_test_config_to_json.tmpl)
matrix_length=$(echo $matrix | jq '.[] | length')
echo "matrix_length=$matrix_length" >> $GITHUB_OUTPUT
echo "matrix=$matrix" >> $GITHUB_OUTPUT
deploy_runner_gpu:
name: Deploy Github Runner - GPU
needs: read_test_configuration
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
- name: Setup Python
id: python
uses: actions/setup-python@57ded4d7d5e986d7296eab16560982c6dd7c923b
with:
python-version: '3.8'
- name: Export CLOUDSDK_PYTHON env variable
run: |
echo "CLOUDSDK_PYTHON=${{ steps.python.outputs.python-path }}" >> $GITHUB_OUTPUT
export CLOUDSDK_PYTHON=${{ steps.python.outputs.python-path }}
- name: Download gomplate
run: |-
curl -o gomplate -sSL https://github.com/hairyhenderson/gomplate/releases/download/v3.9.0/gomplate_linux-amd64
chmod 755 gomplate
- name: Get TensorFlow version
run: |-
# Read TF version from poetry.lock file
pip install toml
TF_VERSION=$(scripts/read_tensorflow_version.sh)
echo "TensorFlow version: $TF_VERSION"
echo TF_VERSION=$TF_VERSION >> $GITHUB_ENV
# Use compatible CUDA/cuDNN with the given TF version
- name: Prepare GitHub runner image tag
run: |-
GH_RUNNER_IMAGE_TAG=$(jq -r 'if (.config | any(.TF == "${{ env.TF_VERSION }}" )) then (.config[] | select(.TF == "${{ env.TF_VERSION }}") | .IMAGE_TAG) else .default_image_tag end' .github/configs/tf-cuda.json)
echo "GitHub runner image tag for TensorFlow ${{ env.TF_VERSION }} is ${GH_RUNNER_IMAGE_TAG}"
echo GH_RUNNER_IMAGE_TAG=$GH_RUNNER_IMAGE_TAG >> $GITHUB_ENV
num_max_replicas=3
matrix_length=${{ needs.read_test_configuration.outputs.matrix_length }}
if [[ $matrix_length -gt $num_max_replicas ]]; then
NUM_REPLICAS=$num_max_replicas
else
NUM_REPLICAS=$matrix_length
fi
echo NUM_REPLICAS=$NUM_REPLICAS >> $GITHUB_ENV
- name: Send warning if the current TF version does not have CUDA image tags configured
if: env.GH_RUNNER_IMAGE_TAG == 'latest'
env:
TF_CUDA_FILE: ./github/config/tf-cuda.json
run: |-
echo "::warning file=${TF_CUDA_FILE},line=3,col=1,endColumn=3::Missing cuda config for tf ${{ env.TF_VERSION }}. If you are not sure how to config CUDA, please reach out to infrastructure."
- name: Notify slack on tf-cuda config updates
if: env.GH_RUNNER_IMAGE_TAG == 'latest'
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
uses: voxmedia/github-action-slack-notify-build@3665186a8c1a022b28a1dbe0954e73aa9081ea9e
with:
channel_id: ${{ secrets.SLACK_ALERTS_CHANNEL_ID }}
status: WARNING
color: warning
- name: Render deployment template
run: |-
export GH_RUNNER_IMAGE_TAG=${{ env.GH_RUNNER_IMAGE_TAG }}
export GH_RUNNER_IMAGE=${{ secrets.GH_RUNNER_IMAGE }}
./gomplate -f .github/runner/github-runner-deployment.yaml.tmpl -o runner_deployment.yaml
# Setup gcloud auth
- uses: google-github-actions/auth@e8df18b60c5dd38ba618c121b779307266153fbf
with:
service_account: ${{ secrets.GKE_RASA_CI_GPU_SA_NAME_RASA_CI_CD }}
credentials_json: ${{ secrets.GKE_SA_RASA_CI_CD_GPU_RASA_CI_CD }}
# Get the GKE credentials for the cluster
- name: Get GKE Cluster Credentials
uses: google-github-actions/get-gke-credentials@894c221960ab1bc16a69902f29f090638cca753f
with:
cluster_name: ${{ secrets.GKE_GPU_CLUSTER_RASA_CI_CD }}
location: ${{ env.GKE_ZONE }}
project_id: ${{ secrets.GKE_SA_RASA_CI_GPU_PROJECT_RASA_CI_CD }}
- name: Deploy Github Runner
run: |-
kubectl apply -f runner_deployment.yaml
kubectl -n github-runner rollout status --timeout=15m deployment/github-runner-$GITHUB_RUN_ID
- name: Notify slack on failure
if: failure()
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
uses: voxmedia/github-action-slack-notify-build@3665186a8c1a022b28a1dbe0954e73aa9081ea9e
with:
channel_id: ${{ secrets.SLACK_ALERTS_CHANNEL_ID }}
status: FAILED
color: danger
model_regression_test_gpu:
name: Model Regression Tests - GPU
continue-on-error: true
needs:
- deploy_runner_gpu
- read_test_configuration
env:
# Determine where CUDA and Nvidia libraries are located. TensorFlow looks for libraries in the given paths
LD_LIBRARY_PATH: "/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64"
ACCELERATOR_TYPE: "GPU"
GITHUB_ISSUE_TITLE: "Scheduled Model Regression Test Failed"
runs-on: [self-hosted, gpu, "${{ github.run_id }}"]
strategy:
# max-parallel: By default, GitHub will maximize the number of jobs run in parallel depending on the available runners on GitHub-hosted virtual machines.
matrix: ${{fromJson(needs.read_test_configuration.outputs.matrix)}}
steps:
- name: Checkout
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
- name: Checkout dataset
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
with:
repository: ${{ secrets.DATASET_REPOSITORY }}
token: ${{ secrets.ML_TEST_SA_PAT }}
path: "dataset"
ref: "main"
- name: Set env variables
id: set_dataset_config_vars
env:
DATASET_NAME: "${{ matrix.dataset }}"
CONFIG_NAME: "${{ matrix.config }}"
run: |-
# determine extra environment variables
# - CONFIG
# - DATASET
# - IS_EXTERNAL
# - EXTERNAL_DATASET_REPOSITORY_BRANCH
# - TRAIN_DIR
# - TEST_DIR
# - DOMAIN_FILE
source <(gomplate -d mapping=./dataset/dataset_config_mapping.json -f .github/templates/configuration_variables.tmpl)
# Not all configurations are available for all datasets.
# The job will fail and the workflow continues, if the configuration file doesn't exist
# for a given dataset
echo "is_dataset_exists=true" >> $GITHUB_OUTPUT
echo "is_config_exists=true" >> $GITHUB_OUTPUT
echo "is_external=${IS_EXTERNAL}" >> $GITHUB_OUTPUT
if [[ "${IS_EXTERNAL}" == "true" ]]; then
echo "DATASET_DIR=dataset_external" >> $GITHUB_ENV
else
echo "DATASET_DIR=dataset" >> $GITHUB_ENV
test -d dataset/$DATASET || (echo "::warning::The ${{ matrix.dataset }} dataset doesn't exist. Skipping the job." \
&& echo "is_config_exists=false" >> $GITHUB_OUTPUT && exit 0)
fi
# Skip job if dataset is Hermit and config is BERT + DIET(seq) + ResponseSelector(t2t) or Sparse + BERT + DIET(seq) + ResponseSelector(t2t)
if [[ "${{ matrix.dataset }}" == "Hermit" && "${{ matrix.config }}" =~ "BERT + DIET(seq) + ResponseSelector(t2t)" ]]; then
echo "::warning::This ${{ matrix.dataset }} dataset / ${{ matrix.config }} config is currently being skipped due to OOM associated with the upgrade to TF 2.6." \
&& echo "is_config_exists=false" >> $GITHUB_OUTPUT && exit 0
fi
# Skip job if a given type is not available for a given dataset
if [[ -z "${DOMAIN_FILE}" && "${{ matrix.type }}" == "core" ]]; then
echo "::warning::The ${{ matrix.dataset }} dataset doesn't include core type. Skipping the job." \
&& echo "is_config_exists=false" >> $GITHUB_OUTPUT && exit 0
fi
test -f dataset/configs/$CONFIG || (echo "::warning::The ${{ matrix.config }} configuration file doesn't exist. Skipping the job." \
&& echo "is_dataset_exists=false" >> $GITHUB_OUTPUT && exit 0)
echo "DATASET=${DATASET}" >> $GITHUB_ENV
echo "CONFIG=${CONFIG}" >> $GITHUB_ENV
echo "DOMAIN_FILE=${DOMAIN_FILE}" >> $GITHUB_ENV
echo "EXTERNAL_DATASET_REPOSITORY_BRANCH=${EXTERNAL_DATASET_REPOSITORY_BRANCH}" >> $GITHUB_ENV
echo "IS_EXTERNAL=${IS_EXTERNAL}" >> $GITHUB_ENV
if [[ -z "${TRAIN_DIR}" ]]; then
echo "TRAIN_DIR=train" >> $GITHUB_ENV
else
echo "TRAIN_DIR=${TRAIN_DIR}" >> $GITHUB_ENV
fi
if [[ -z "${TEST_DIR}" ]]; then
echo "TEST_DIR=test" >> $GITHUB_ENV
else
echo "TEST_DIR=${TEST_DIR}" >> $GITHUB_ENV
fi
HOST_NAME=`hostname`
echo "HOST_NAME=${HOST_NAME}" >> $GITHUB_ENV
- name: Checkout dataset - external
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
if: steps.set_dataset_config_vars.outputs.is_external == 'true'
with:
repository: ${{ env.DATASET }}
token: ${{ secrets.ML_TEST_SA_PAT }}
path: "dataset_external"
ref: ${{ env.EXTERNAL_DATASET_REPOSITORY_BRANCH }}
- name: Set dataset commit
id: set-dataset-commit
working-directory: ${{ env.DATASET_DIR }}
run: |
DATASET_COMMIT=$(git rev-parse HEAD)
echo $DATASET_COMMIT
echo "dataset_commit=$DATASET_COMMIT" >> $GITHUB_OUTPUT
- name: Start Datadog Agent
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
env:
DATASET_NAME: "${{ matrix.dataset }}"
CONFIG: "${{ matrix.config }}"
DATASET_COMMIT: "${{ steps.set-dataset-commit.outputs.dataset_commit }}"
BRANCH: ${{ github.ref }}
GITHUB_SHA: "${{ github.sha }}"
TYPE: "${{ matrix.type }}"
DATASET_REPOSITORY_BRANCH: "main"
INDEX_REPETITION: "${{ matrix.index_repetition }}"
run: |
.github/scripts/start_dd_agent.sh "${{ secrets.DD_API_KEY }}" "${{ env.ACCELERATOR_TYPE }}" ${{ env.NVML_INTERVAL_IN_SEC }}
- name: Set up Python 3.10 🐍
uses: actions/setup-python@57ded4d7d5e986d7296eab16560982c6dd7c923b
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
with:
python-version: '3.10'
- name: Read Poetry Version 🔢
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
run: |
echo "POETRY_VERSION=$(scripts/poetry-version.sh)" >> $GITHUB_ENV
shell: bash
- name: Install poetry 🦄
uses: Gr1N/setup-poetry@15821dc8a61bc630db542ae4baf6a7c19a994844 # v8
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
with:
poetry-version: ${{ env.POETRY_VERSION }}
- name: Load Poetry Cached Libraries ⬇
uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
with:
path: ~/.cache/pypoetry/virtualenvs
key: ${{ runner.os }}-poetry-${{ env.POETRY_VERSION }}-3.9-${{ hashFiles('**/poetry.lock') }}-${{ secrets.POETRY_CACHE_VERSION }}
- name: Install Dependencies 📦
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
run: |
make install-full
poetry run python -m spacy download de_core_news_md
- name: Install datadog-api-client
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
run: poetry run pip install -U datadog-api-client
- name: Validate that GPUs are working
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
run: |-
poetry run python .github/scripts/validate_gpus.py
- name: Download pretrained models 💪
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
run: |-
poetry run python .github/scripts/download_pretrained.py --config dataset/configs/${CONFIG}
- name: Run test
id: run_test
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
env:
TFHUB_CACHE_DIR: ~/.tfhub_cache/
OMP_NUM_THREADS: 1
run: |-
poetry run rasa --version
export NOW_TRAIN=$(gomplate -i '{{ (time.Now).Format time.RFC3339}}');
cd ${{ github.workspace }}
if [[ "${{ steps.set_dataset_config_vars.outputs.is_external }}" == "true" ]]; then
export DATASET=.
fi
if [[ "${{ matrix.type }}" == "nlu" ]]; then
poetry run rasa train nlu --quiet -u "${DATASET_DIR}/${DATASET}/${TRAIN_DIR}" -c "dataset/configs/${CONFIG}" --out "${DATASET_DIR}/models/${DATASET}/${CONFIG}"
echo "train_run_time=$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TRAIN") }}{{ (time.Since $t).Round (time.Second 1) }}')" >> $GITHUB_OUTPUT
export NOW_TEST=$(gomplate -i '{{ (time.Now).Format time.RFC3339}}');
poetry run rasa test nlu --quiet -u "${DATASET_DIR}/$DATASET/${TEST_DIR}" -m "${DATASET_DIR}/models/$DATASET/$CONFIG" --out "${{ github.workspace }}/results/$DATASET/$CONFIG"
echo "test_run_time=$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TEST") }}{{ (time.Since $t).Round (time.Second 1) }}')" >> $GITHUB_OUTPUT
echo "total_run_time=$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TRAIN") }}{{ (time.Since $t).Round (time.Second 1) }}')" >> $GITHUB_OUTPUT
elif [[ "${{ matrix.type }}" == "core" ]]; then
poetry run rasa train core --quiet -s ${DATASET_DIR}/$DATASET/$TRAIN_DIR -c dataset/configs/$CONFIG -d ${DATASET_DIR}/${DATASET}/${DOMAIN_FILE}
echo "train_run_time=$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TRAIN") }}{{ (time.Since $t).Round (time.Second 1) }}')" >> $GITHUB_OUTPUT
export NOW_TEST=$(gomplate -i '{{ (time.Now).Format time.RFC3339}}');
poetry run rasa test core -s "${DATASET_DIR}/${DATASET}/${TEST_DIR}" --out "${{ github.workspace }}/results/${{ matrix.dataset }}/${{ matrix.config }}"
echo "test_run_time=$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TEST") }}{{ (time.Since $t).Round (time.Second 1) }}')" >> $GITHUB_OUTPUT
echo "total_run_time=$(gomplate -i '{{ $t := time.Parse time.RFC3339 (getenv "NOW_TRAIN") }}{{ (time.Since $t).Round (time.Second 1) }}')" >> $GITHUB_OUTPUT
fi
- name: Generate a JSON file with a report / Publish results to Datadog
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
env:
SUMMARY_FILE: "./report.json"
DATASET_NAME: ${{ matrix.dataset }}
RESULT_DIR: "${{ github.workspace }}/results"
CONFIG: ${{ matrix.config }}
TEST_RUN_TIME: ${{ steps.run_test.outputs.test_run_time }}
TRAIN_RUN_TIME: ${{ steps.run_test.outputs.train_run_time }}
TOTAL_RUN_TIME: ${{ steps.run_test.outputs.total_run_time }}
DATASET_REPOSITORY_BRANCH: "main"
TYPE: ${{ matrix.type }}
INDEX_REPETITION: ${{ matrix.index_repetition }}
DATASET_COMMIT: ${{ steps.set-dataset-commit.outputs.dataset_commit }}
BRANCH: ${{ github.ref }}
PR_ID: "${{ github.event.number }}"
PR_URL: ""
DD_APP_KEY: ${{ secrets.DD_APP_KEY_PERF_TEST }}
DD_API_KEY: ${{ secrets.DD_API_KEY }}
DD_SITE: datadoghq.eu
run: |-
poetry run pip install analytics-python
poetry run python .github/scripts/mr_publish_results.py
cat $SUMMARY_FILE
- name: Upload an artifact with the report
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
with:
name: report-${{ matrix.dataset }}-${{ matrix.config }}-${{ matrix.index_repetition }}
path: report.json
# Prepare diagnostic data for the configs which evaluate dialog policy performance
- name: Prepare diagnostic data
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' && matrix.type == 'core'
run: |
# Create dummy files to preserve directory structure when uploading artifacts
# See: https://github.com/actions/upload-artifact/issues/174
touch "results/${{ matrix.dataset }}/.keep"
touch "results/${{ matrix.dataset }}/${{ matrix.config }}/.keep"
- name: Upload an artifact with diagnostic data
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true' && matrix.type == 'core'
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
with:
name: diagnostic_data
path: |
results/**/.keep
results/${{ matrix.dataset }}/${{ matrix.config }}/failed_test_stories.yml
results/${{ matrix.dataset }}/${{ matrix.config }}/story_confusion_matrix.png
- name: Stop Datadog Agent
if: steps.set_dataset_config_vars.outputs.is_dataset_exists == 'true' && steps.set_dataset_config_vars.outputs.is_config_exists == 'true'
run: |
sudo service datadog-agent stop
- name: Check duplicate issue
if: failure() && github.event_name == 'schedule'
uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410
id: issue-exists
with:
result-encoding: string
github-token: ${{ github.token }}
script: |
// Get all open issues
const opts = await github.issues.listForRepo.endpoint.merge({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: ${{ env.GITHUB_ISSUE_LABELS }}
});
const issues = await github.paginate(opts)
// Check if issue exist by comparing title and body
for (const issue of issues) {
if (issue.title.includes('${{ env.GITHUB_ISSUE_TITLE }}') &&
issue.body.includes('${{ matrix.dataset }}') &&
issue.body.includes('${{ matrix.config }}')) {
console.log(`Found an exist issue \#${issue.number}. Skip the following steps.`);
return 'true'
}
}
return 'false'
- name: Create GitHub Issue 📬
id: create-issue
if: failure() && steps.issue-exists.outputs.result == 'false' && github.event_name == 'schedule'
uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410
with:
# do not use GITHUB_TOKEN here because it wouldn't trigger subsequent workflows
github-token: ${{ secrets.RASABOT_GITHUB_TOKEN }}
script: |
var issue = await github.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: '${{ env.GITHUB_ISSUE_TITLE }}',
labels: ${{ env.GITHUB_ISSUE_LABELS }},
body: '*This PR is automatically created by the Scheduled Model Regression Test workflow. Checkout the Github Action Run [here](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}).* <br> --- <br> **Description of Problem:** <br> Scheduled Model Regression Test failed. <br> **Configuration**: `${{ matrix.config }}` <br> **Dataset**: `${{ matrix.dataset}}`'
})
return issue.data.number
- name: Notify Slack of Failure 😱
if: failure() && steps.issue-exists.outputs.result == 'false' && github.event_name == 'schedule'
uses: 8398a7/action-slack@fbd6aa58ba854a740e11a35d0df80cb5d12101d8 # v3
with:
status: custom
fields: workflow,job,commit,repo,ref,author,took
custom_payload: |
{
attachments: [{
fallback: 'fallback',
color: '${{ job.status }}' === 'success' ? 'good' : '${{ job.status }}' === 'failure' ? 'danger' : 'warning',
title: `${process.env.AS_WORKFLOW}`,
text: 'Scheduled model regression test failed :no_entry:️',
fields: [{
title: 'Configuration',
value: '${{ matrix.config }}',
short: false
},
{
title: 'Dataset',
value: '${{ matrix.dataset }}',
short: false
},
{
title: 'GitHub Issue',
value: `https://github.com/${{ github.repository }}/issues/${{ steps.create-issue.outputs.result }}`,
short: false
},
{
title: 'GitHub Action',
value: `https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}`,
short: false
}],
actions: [{
}]
}]
}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_CI_MODEL_REGRESSION_TEST }}
combine_reports:
name: Combine reports
runs-on: ubuntu-22.04
needs:
- model_regression_test_gpu
if: always() && needs.model_regression_test_gpu.result == 'success'
steps:
- name: Checkout git repository 🕝
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
- name: Set up Python 3.10 🐍
uses: actions/setup-python@57ded4d7d5e986d7296eab16560982c6dd7c923b
with:
python-version: '3.10'
- name: Get reports
uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a
with:
path: reports/
- name: Display structure of downloaded files
continue-on-error: true
run: ls -R
working-directory: reports/
- name: Merge all reports
env:
SUMMARY_FILE: "./report.json"
REPORTS_DIR: "reports/"
run: |
python .github/scripts/mr_generate_summary.py
cat $SUMMARY_FILE
- name: Upload an artifact with the overall report
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
with:
name: report.json
path: ./report.json
analyse_performance:
name: Analyse Performance
runs-on: ubuntu-22.04
if: always() && github.event_name == 'schedule'
needs:
- model_regression_test_gpu
- combine_reports
env:
GITHUB_ISSUE_TITLE: "Scheduled Model Regression Test Performance Drops"
steps:
- name: Checkout
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
- name: Download report from last on-schedule regression test
run: |
# Get ID of last on-schedule workflow
SCHEDULE_ID=$(curl -X GET -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/actions/workflows" \
| jq -r '.workflows[] | select(.name == "${{ github.workflow }}") | select(.path | test("schedule")) | .id')
ARTIFACT_URL=$(curl -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/actions/workflows/${SCHEDULE_ID}/runs?event=schedule&status=completed&branch=main&per_page=1" | jq -r .workflow_runs[0].artifacts_url)
DOWNLOAD_URL=$(curl -s -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -H "Accept: application/vnd.github.v3+json" "${ARTIFACT_URL}" \
| jq -r '.artifacts[] | select(.name == "report.json") | .archive_download_url')
# Download the artifact
curl -H 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' -LJO -H "Accept: application/vnd.github.v3+json" $DOWNLOAD_URL
# Unzip and change name
unzip report.json.zip && mv report.json report_main.json
- name: Download the report
uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a
with:
name: report.json
- name: Download gomplate
run: |-
sudo curl -o /usr/local/bin/gomplate -sSL https://github.com/hairyhenderson/gomplate/releases/download/v3.9.0/gomplate_linux-amd64
sudo chmod +x /usr/local/bin/gomplate
- name: Analyse Performance 🔍
id: performance
run: |
OUTPUT="$(gomplate -d data=report.json -d results_main=report_main.json -f .github/templates/model_regression_test_results.tmpl)"
OUTPUT="${OUTPUT//$'\n'/'%0A'}"
OUTPUT="${OUTPUT//$'\r'/'%0D'}"
OUTPUT="$(echo $OUTPUT | sed 's|`|\\`|g')"
echo "report_description=${OUTPUT}" >> $GITHUB_OUTPUT
IS_DROPPED=false
# Loop through all negative values within parentheses
# Set IS_DROPPED to true if there is any value lower
# than the threshold
for x in $(grep -o '\(-[0-9.]\+\)' <<< $OUTPUT); do
if (( $(bc -l <<< "${{ env.PERFORMANCE_DROP_THRESHOLD }} > $x") )); then
IS_DROPPED=true
echo "The decrease of some test performance is > ${{ env.PERFORMANCE_DROP_THRESHOLD }}. Executing the following steps..."
break
fi
done
echo "is_dropped=$IS_DROPPED" >> $GITHUB_OUTPUT
- name: Check duplicate issue
if: steps.performance.outputs.is_dropped == 'true'
uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410
id: issue-exists
with:
result-encoding: string
github-token: ${{ github.token }}
script: |
// Get all open issues based on labels
const opts = await github.issues.listForRepo.endpoint.merge({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: ${{ env.GITHUB_ISSUE_LABELS }}
});
const issues = await github.paginate(opts)
// Check if issue exist by comparing title
for (const issue of issues) {
if (issue.title.includes('${{ env.GITHUB_ISSUE_TITLE }}') ) {
console.log(`Found an exist issue \#${issue.number}. Skip the following steps.`);
return 'true'
}
}
return 'false'
- name: Create GitHub Issue 📬
id: create-issue
if: steps.performance.outputs.is_dropped == 'true' && steps.issue-exists.outputs.result == 'false'
uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410
with:
# do not use GITHUB_TOKEN here because it wouldn't trigger subsequent workflows
github-token: ${{ secrets.RASABOT_GITHUB_TOKEN }}
script: |
// Prepare issue body
let issue_body = '*This PR is automatically created by the Scheduled Model Regression Test workflow. Checkout the Github Action Run [here](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}).* <br> --- <br> **Description of Problem:** <br> Some test performance scores **decreased**. Please look at the following table for more details. <br>'
issue_body += `${{ steps.performance.outputs.report_description }}`
// Open issue
var issue = await github.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: '${{ env.GITHUB_ISSUE_TITLE }}',
labels: ${{ env.GITHUB_ISSUE_LABELS }},
body: issue_body
})
return issue.data.number
- name: Notify Slack when Performance Drops 💬
if: steps.performance.outputs.is_dropped == 'true' && steps.issue-exists.outputs.result == 'false'
uses: 8398a7/action-slack@fbd6aa58ba854a740e11a35d0df80cb5d12101d8 #v3
with:
status: custom
fields: workflow,job,commit,repo,ref,author,took
custom_payload: |
{
attachments: [{
fallback: 'fallback',
color: 'danger',
title: `${process.env.AS_WORKFLOW}`,
text: 'Scheduled model regression test performance drops :chart_with_downwards_trend:',
fields: [{
title: 'GitHub Issue',
value: `https://github.com/${{ github.repository }}/issues/${{ steps.create-issue.outputs.result }}`,
short: false
},
{
title: 'GitHub Action',
value: `https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}`,
short: false
}],
actions: [{
}]
}]
}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_CI_MODEL_REGRESSION_TEST }}
remove_runner_gpu:
name: Delete Github Runner - GPU
if: always()
needs:
- deploy_runner_gpu
- model_regression_test_gpu
runs-on: ubuntu-22.04
steps:
- name: Setup Python
id: python
uses: actions/setup-python@57ded4d7d5e986d7296eab16560982c6dd7c923b
with:
python-version: '3.8'
- name: Export CLOUDSDK_PYTHON env variable
run: |
echo "CLOUDSDK_PYTHON=${{ steps.python.outputs.python-path }}" >> $GITHUB_OUTPUT
export CLOUDSDK_PYTHON=${{ steps.python.outputs.python-path }}
# Setup gcloud auth
- uses: google-github-actions/auth@e8df18b60c5dd38ba618c121b779307266153fbf
with:
service_account: ${{ secrets.GKE_RASA_CI_GPU_SA_NAME_RASA_CI_CD }}
credentials_json: ${{ secrets.GKE_SA_RASA_CI_CD_GPU_RASA_CI_CD }}
# Get the GKE credentials for the cluster
- name: Get GKE Cluster Credentials
uses: google-github-actions/get-gke-credentials@894c221960ab1bc16a69902f29f090638cca753f
with:
cluster_name: ${{ secrets.GKE_GPU_CLUSTER_RASA_CI_CD }}
location: ${{ env.GKE_ZONE }}
project_id: ${{ secrets.GKE_SA_RASA_CI_GPU_PROJECT_RASA_CI_CD }}
- name: Remove Github Runner
run: kubectl -n github-runner delete deployments github-runner-${GITHUB_RUN_ID} --grace-period=30