Skip to content

Arrow schema and metadata tags are lost too easily #3336

Arrow schema and metadata tags are lost too easily

Arrow schema and metadata tags are lost too easily #3336

Workflow file for this run

# This workflow is triggered on any PR comment, and tries to find a `@rerun-bot` mention in it.
# If the mention is a command, such as `@rerun-bot full-check`, then it runs the command.
#
# Available commands:
# full-check Triggers a run of `on_push_main.yml` on the PR.
name: "PR Comment"
on:
issue_comment:
types: [created]
defaults:
run:
shell: bash
permissions:
contents: "read"
id-token: "write"
pull-requests: "write"
jobs:
parse-command:
if: |
contains(github.event.comment.html_url, '/pull/') &&
contains(github.event.comment.body, '@rerun-bot') &&
contains(
fromJSON('["COLLABORATOR","MEMBER","OWNER"]'),
github.event.comment.author_association
)
runs-on: ubuntu-latest
outputs:
command: ${{ steps.parse.outputs.command }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Parse comment
id: parse
env:
GITHUB_COMMENT_BODY: "${{ github.event.comment.body }}"
run: python ./scripts/ci/parse_bot_pr_comment.py
full-check:
needs: [parse-command]
if: needs.parse-command.outputs.command == 'full-check'
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Dispatch main workflow
id: dispatch
env:
# NOTE: This uses `RERUN_BOT_TOKEN` instead of `GITHUB_TOKEN`,
# otherwise the recursive workflow protection prevents us from
# starting the `on_push_main` run.
# https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow
GH_TOKEN: ${{ secrets.RERUN_BOT_TOKEN }}
run: |
get_latest_workflow_run () {
local workflow_name=$1
local ref_name=$2
local created_after=$3
echo $(
# https://cli.github.com/manual/gh_run_list
# https://docs.github.com/en/search-github/getting-started-with-searching-on-github/understanding-the-search-syntax#query-for-dates
gh run list \
--workflow $workflow_name \
--event workflow_dispatch \
--branch $ref_name \
--created ">$created_after" \
--json databaseId
)
}
dispatch_workflow () {
local workflow_name=$1
local ref_name=$2
local inputs=$3
# https://cli.github.com/manual/gh_workflow_run
echo $inputs | gh workflow run $workflow_name --ref "refs/heads/$ref_name" --json
}
workflow_name='on_push_main.yml'
ref_name=$(gh pr view ${{ github.event.issue.number }} --json headRefName | jq -r '.headRefName')
inputs='{"CONCURRENCY":"pr-${{ github.event.issue.number }}-full-check"}'
now=$(date --utc --iso-8601=seconds)
echo "Dispatching workflow $workflow_name on branch $ref_name"
dispatch_workflow $workflow_name $ref_name $inputs
# `gh workflow run` does NOT return the ID.
# In fact, it returns absolutely nothing: https://github.com/cli/cli/issues/4001
# Instead, we have to wait for the workflow to start, and hope that nobody has
# started a workflow in parallel with us on the same branch.
echo "Fetching workflow run id…"
run_info=$(get_latest_workflow_run $workflow_name $ref_name $now)
echo $run_info
run_id=$(echo $run_info | jq -r '.[0].databaseId')
while [ $run_id == 'null' ]
do
run_info=$(get_latest_workflow_run $workflow_name $ref_name $now)
echo $run_info
run_id=$(echo $run_info | jq -r '.[0].databaseId')
sleep 1
done
echo "Workflow run: https://github.com/rerun-io/rerun/actions/runs/$run_id"
echo "workflow_run_url=https://github.com/rerun-io/rerun/actions/runs/$run_id" >> "$GITHUB_OUTPUT"
- name: Create PR comment
# https://github.com/mshick/add-pr-comment
uses: mshick/[email protected]
with:
# We use `GITHUB_TOKEN` here so there is no chance that we'll trigger another run of this workflow.
# https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow
repo-token: ${{ secrets.GITHUB_TOKEN }}
message-id: "pr-${{ github.event.issue.number }}-${{ github.run_id }}"
message: |
Started a full build: ${{ steps.dispatch.outputs.workflow_run_url }}