.github/workflows/model_performance.yml

# **what?**
# This workflow models the performance characteristics of a point in time in dbt.
# It runs specific dbt commands on committed projects multiple times to create and
# commit information about the distribution to the current branch. For more information
# see the readme in the performance module at /performance/README.md.
#
# **why?**
# When developing new features, we can take quick performance samples and compare
# them against the commited baseline measurements produced by this workflow to detect
# some performance regressions at development time before they reach users.
#
# **when?**
# This is only run once directly after each release (for non-prereleases). If for some
# reason the results of a run are not satisfactory, it can also be triggered manually.

name: Model Performance Characteristics

on:
  # runs after non-prereleases are published.
  release:
    types: [released]
  # run manually from the actions tab
  workflow_dispatch:
    inputs:
      release_id:
        description: 'dbt version to model (must be non-prerelease in Pypi)'
        type: string
        required: true

env:
  RUNNER_CACHE_PATH: performance/runner/target/release/runner

# both jobs need to write
permissions:
  contents: write
  pull-requests: write

jobs:
  set-variables:
    name: Setting Variables
    runs-on: ubuntu-latest
    outputs:
      cache_key: ${{ steps.variables.outputs.cache_key }}
      release_id: ${{ steps.semver.outputs.base-version }}
      release_branch: ${{ steps.variables.outputs.release_branch }}
    steps:

      # explicitly checkout the performance runner from main regardless of which
      # version we are modeling.
      - name: Checkout
        uses: actions/checkout@v4
        with:
          ref: main

      - name: Parse version into parts
        id: semver
        uses: dbt-labs/actions/parse-semver@v1
        with:
          version: ${{ github.event.inputs.release_id || github.event.release.tag_name }}

      # collect all the variables that need to be used in subsequent jobs
      - name: Set variables
        id: variables
        run: |
          # create a cache key that will be used in the next job. without this the
          # next job would have to checkout from main and hash the files itself.
          echo "cache_key=${{ runner.os }}-${{ hashFiles('performance/runner/Cargo.toml')}}-${{ hashFiles('performance/runner/src/*') }}" >> $GITHUB_OUTPUT

          branch_name="${{steps.semver.outputs.major}}.${{steps.semver.outputs.minor}}.latest"
          echo "release_branch=$branch_name" >> $GITHUB_OUTPUT
          echo "release branch is inferred to be ${branch_name}"

  latest-runner:
    name: Build or Fetch Runner
    runs-on: ubuntu-latest
    needs: [set-variables]
    env:
      RUSTFLAGS: "-D warnings"
    steps:
      - name: '[DEBUG] print variables'
        run: |
          echo "all variables defined in set-variables"
          echo "cache_key:      ${{ needs.set-variables.outputs.cache_key }}"
          echo "release_id:     ${{ needs.set-variables.outputs.release_id }}"
          echo "release_branch: ${{ needs.set-variables.outputs.release_branch }}"

      # explicitly checkout the performance runner from main regardless of which
      # version we are modeling.
      - name: Checkout
        uses: actions/checkout@v4
        with:
          ref: main

      # attempts to access a previously cached runner
      - uses: actions/cache@v4
        id: cache
        with:
          path: ${{ env.RUNNER_CACHE_PATH }}
          key: ${{ needs.set-variables.outputs.cache_key }}

      - name: Fetch Rust Toolchain
        if: steps.cache.outputs.cache-hit != 'true'
        uses: actions-rs/toolchain@v1
        with:
          profile: minimal
          toolchain: stable
          override: true

      - name: Add fmt
        if: steps.cache.outputs.cache-hit != 'true'
        run: rustup component add rustfmt

      - name: Cargo fmt
        if: steps.cache.outputs.cache-hit != 'true'
        uses: actions-rs/cargo@v1
        with:
          command: fmt
          args: --manifest-path performance/runner/Cargo.toml --all -- --check

      - name: Test
        if: steps.cache.outputs.cache-hit != 'true'
        uses: actions-rs/cargo@v1
        with:
          command: test
          args: --manifest-path performance/runner/Cargo.toml

      - name: Build (optimized)
        if: steps.cache.outputs.cache-hit != 'true'
        uses: actions-rs/cargo@v1
        with:
          command: build
          args: --release --manifest-path performance/runner/Cargo.toml
      # the cache action automatically caches this binary at the end of the job

  model:
    # depends on `latest-runner` as a separate job so that failures in this job do not prevent
    # a successfully tested and built binary from being cached.
    needs: [set-variables, latest-runner]
    name: Model a release
    runs-on: ubuntu-latest
    steps:

      - name: '[DEBUG] print variables'
        run: |
          echo "all variables defined in set-variables"
          echo "cache_key:      ${{ needs.set-variables.outputs.cache_key }}"
          echo "release_id:     ${{ needs.set-variables.outputs.release_id }}"
          echo "release_branch: ${{ needs.set-variables.outputs.release_branch }}"

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.8"

      - name: Install dbt
        run: pip install dbt-postgres==${{ needs.set-variables.outputs.release_id }}

      - name: Install Hyperfine
        run: wget https://github.com/sharkdp/hyperfine/releases/download/v1.11.0/hyperfine_1.11.0_amd64.deb && sudo dpkg -i hyperfine_1.11.0_amd64.deb

      # explicitly checkout main to get the latest project definitions
      - name: Checkout
        uses: actions/checkout@v4
        with:
          ref: main

      # this was built in the previous job so it will be there.
      - name: Fetch Runner
        uses: actions/cache@v4
        id: cache
        with:
          path: ${{ env.RUNNER_CACHE_PATH }}
          key: ${{ needs.set-variables.outputs.cache_key }}

      - name: Move Runner
        run: mv performance/runner/target/release/runner performance/app

      - name: Change Runner Permissions
        run: chmod +x ./performance/app

      - name: '[DEBUG] ls baseline directory before run'
        run: ls -R performance/baselines/

      # `${{ github.workspace }}` is used to pass the absolute path
      - name: Create directories
        run: |
          mkdir ${{ github.workspace }}/performance/tmp/
          mkdir -p performance/baselines/${{ needs.set-variables.outputs.release_id }}/

      # Run modeling with taking 20 samples
      - name: Run Measurement
        run: |
          performance/app model -v ${{ needs.set-variables.outputs.release_id }} -b ${{ github.workspace }}/performance/baselines/ -p ${{ github.workspace }}/performance/projects/ -t ${{ github.workspace }}/performance/tmp/ -n 20

      - name: '[DEBUG] ls baseline directory after run'
        run: ls -R performance/baselines/

      - uses: actions/upload-artifact@v3
        with:
          name: baseline
          path: performance/baselines/${{ needs.set-variables.outputs.release_id }}/

  create-pr:
    name: Open PR for ${{ matrix.base-branch }}

    # depends on `model` as a separate job so that the baseline can be committed to more than one branch
    # i.e. release branch and main
    needs: [set-variables, latest-runner, model]
    runs-on: ubuntu-latest

    strategy:
      matrix:
        include:
          - base-branch: refs/heads/main
            target-branch: performance-bot/main_${{ needs.set-variables.outputs.release_id }}_${{GITHUB.RUN_ID}}
          - base-branch: refs/heads/${{ needs.set-variables.outputs.release_branch }}
            target-branch: performance-bot/release_${{ needs.set-variables.outputs.release_id }}_${{GITHUB.RUN_ID}}

    steps:
      - name: '[DEBUG] print variables'
        run: |
          echo "all variables defined in set-variables"
          echo "cache_key:      ${{ needs.set-variables.outputs.cache_key }}"
          echo "release_id:     ${{ needs.set-variables.outputs.release_id }}"
          echo "release_branch: ${{ needs.set-variables.outputs.release_branch }}"

      - name: Checkout
        uses: actions/checkout@v4
        with:
          ref: ${{ matrix.base-branch }}

      - name: Create PR branch
        run: |
          git checkout -b ${{ matrix.target-branch }}
          git push origin ${{ matrix.target-branch }}
          git branch --set-upstream-to=origin/${{ matrix.target-branch }} ${{ matrix.target-branch }}

      - uses: actions/download-artifact@v4
        with:
          name: baseline
          path: performance/baselines/${{ needs.set-variables.outputs.release_id }}

      - name: '[DEBUG] ls baselines after artifact download'
        run: ls -R performance/baselines/

      - name: Commit baseline
        uses: EndBug/add-and-commit@v9
        with:
          add: 'performance/baselines/*'
          author_name: 'Github Build Bot'
          author_email: 'buildbot@fishtownanalytics.com'
          message: 'adding performance baseline for ${{ needs.set-variables.outputs.release_id }}'
          push: 'origin origin/${{ matrix.target-branch }}'

      - name: Create Pull Request
        uses: peter-evans/create-pull-request@v5
        with:
          author: 'Github Build Bot <buildbot@fishtownanalytics.com>'
          base: ${{ matrix.base-branch }}
          branch: '${{ matrix.target-branch }}'
          title: 'Adding performance modeling for ${{needs.set-variables.outputs.release_id}} to ${{ matrix.base-branch }}'
          body: 'Committing perf results for tracking for the ${{needs.set-variables.outputs.release_id}}'
          labels: |
            Skip Changelog
            Performance