From 75d3f86de061a568cf287cb80a0c818b822d5d0c Mon Sep 17 00:00:00 2001 From: "vault-token-factory-spectrocloud[bot]" <133815545+vault-token-factory-spectrocloud[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 19:52:24 +0000 Subject: [PATCH] docs: add clean up images job DOC-1232 (#4409) (#4449) * docs: add clean up images job DOC-1232 * docs: add on push for testing * docs: add npm ci step * docs: add fetch in script * docs: add debug lines * docs: adjust github action flow * docs: add backport labels * docs: remove results files from commit * docs: add PR environment variable * docs: add slack notification * docs: remove on push trigger * docs: adjust slack step if check * docs: add pus for final testing * docs: remove push trigger before merge (cherry picked from commit c272eaf2bd6eb008e62825cd18de00537f200f96) Co-authored-by: Adelina Simion <43963729+addetz@users.noreply.github.com> --- .github/workflows/clean-up-unused-images.yaml | 112 ++++++++++++++++++ .gitignore | 2 + Makefile | 7 +- scripts/find-unused-images.sh | 53 +++++++++ 4 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/clean-up-unused-images.yaml create mode 100755 scripts/find-unused-images.sh diff --git a/.github/workflows/clean-up-unused-images.yaml b/.github/workflows/clean-up-unused-images.yaml new file mode 100644 index 0000000000..3b031244ea --- /dev/null +++ b/.github/workflows/clean-up-unused-images.yaml @@ -0,0 +1,112 @@ +name: Clean Up Unused Images + +on: + schedule: + # On the first of every month at 2 am + - cron: '0 2 1 * *' + workflow_dispatch: + +concurrency: + group: clean-up-images-${{ github.ref }} + cancel-in-progress: true + +jobs: + find_unused_images: + runs-on: ubuntu-latest + + steps: + - name: Retrieve Credentials + id: import-secrets + uses: hashicorp/vault-action@v3.0.0 + with: + url: https://vault.prism.spectrocloud.com + method: approle + roleId: ${{ secrets.VAULT_ROLE_ID }} + secretId: ${{ secrets.VAULT_SECRET_ID }} + secrets: /providers/github/organizations/spectrocloud/token?org_name=spectrocloud token | VAULT_GITHUB_TOKEN + + - id: checkout + name: Checkout Repository + uses: actions/checkout@v4 + with: + token: ${{ steps.import-secrets.outputs.VAULT_GITHUB_TOKEN }} + + - name: Setup Nodejs + uses: actions/setup-node@v4 + with: + node-version: "20" + + - run: npm ci + + - name: Find unused images + run: make find-unused-images + + - name: Set Git User + # see: https://github.com/actions/checkout/issues/13#issuecomment-724415212 + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com" + + - name: Create PR with unused images + run: | + unused_image_count=$(wc -l < unused_images.json) + if unused_image_count == 0; then + "No images found to remove." + exit 0 + fi + + # Ensure that we are on master. + git checkout master + + # Create a new branch. + branch_name="clean-up-unused-images-$(date +%Y%m%d%H%M%S)" + git checkout -b "$branch_name" + + # Remove all the images identified as unused. + for img in $(cat unused_images.json); do + rm static/assets/docs/images/$img + done + + # Construct backport labels. + backport_labels="auto-backport" + for branch in $(cat evaluated_branches.json); do + if [[ $branch =~ version-[0-9]+(-[0-9]+)*$ ]]; then + backport_labels+=",backport-$branch" + fi + done + + # Clean up results file. + rm unused_images.json + rm evaluated_branches.json + + # Commit and push branch + git add . + git commit -m "docs: clean up unused images" + git push origin $branch_name + + # Create the pull request + pr_body=' + ## Describe the Change + This PR removes images identified as unused across all our branches. + The images are identified using `scripts/find-unused-images.sh` script. + Please review this PR carefully before merging it.' + + output=$(gh pr create --base master --title "docs: clean up librarium unused images " --body "$pr_body" --label "$backport_labels") + pr_url=$(echo "$output" | grep -o "https://[^ ]*") + echo "PR successfully created $pr_url." + + echo "GITHUB_CREATED_CLEANUP_PR=$pr_url" >> $GITHUB_ENV + env: + GH_TOKEN: ${{ steps.import-secrets.outputs.VAULT_GITHUB_TOKEN }} + + - name: Slack Notification + if: ${{ env.GITHUB_CREATED_CLEANUP_PR != ''}} + uses: rtCamp/action-slack-notify@v2.3.0 + env: + SLACK_WEBHOOK: ${{ secrets.SLACK_PRIVATE_TEAM_WEBHOOK }} + SLACK_USERNAME: "spectromate" + SLACK_ICON_EMOJI: ":ok_hand:" + SLACK_COLOR: ${{ job.status }} + SLACKIFY_MARKDOWN: true + ENABLE_ESCAPES: true + SLACK_MESSAGE: 'A new PR with unused images to clean up was created. Please review ${{env.GITHUB_CREATED_CLEANUP_PR}} for more details.' \ No newline at end of file diff --git a/.gitignore b/.gitignore index 8f9c27cd5e..f11d40f6bd 100644 --- a/.gitignore +++ b/.gitignore @@ -78,3 +78,5 @@ static/img/packs vale/styles/spectrocloud/ vale/styles/spectrocloud-docs-internal/ vale/styles/config/vocabularies/spectrocloud-vocab + +unused_images.json diff --git a/Makefile b/Makefile index 98e3b5f562..e140f997e2 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,6 @@ clean-visuals: @echo "Cleaning visual regression tests" rm -rf test-results/ playwright-report/ screenshots/ - ##@ npm Targets @@ -221,6 +220,12 @@ format-images: ## Format images @echo "formatting images in /static/assets/docs/images/ folder" ./scripts/compress-convert-images.sh +###@ Find unused images assets + +find-unused-images: + @echo "Find unused image assets" + ./scripts/find-unused-images.sh + ###@ Generate _partials/index.ts required to automatic partials usage. generate-partials: ## Generate diff --git a/scripts/find-unused-images.sh b/scripts/find-unused-images.sh new file mode 100755 index 0000000000..0adc4a4d62 --- /dev/null +++ b/scripts/find-unused-images.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Enable error handling +set -e + +# Create a list of all the images we have and save it to a json. +# Trim the path static/assets/docs/images. +find static/assets/docs/images -type f \( -name "*.gif" -o -name "*.webp" \) ! -name ".DS_STORE" ! -name ".DS_Store" | sed 's|static/assets/docs/images||g' > all_images.json +image_count=$(wc -l < all_images.json) +echo "Detected $image_count .webp and .gif assets in static/assets/docs/images..." + +# Fetch all branches +git fetch --all + +# List all the version branches +branches="master" +version_branches=$(git branch -a | grep -E 'version-[0-9]+(-[0-9]+)*$') +for version_branch in $version_branches; do + # Remove leading spaces and remote prefix (if any) + version_branch=$(echo $version_branch | sed 's/ *//;s/remotes\/origin\///') + + branches+=" $version_branch" +done + +echo "Evaluating the following branches for image usage: { $branches }" +echo "$branches" > evaluated_branches.json + +for current_branch in $branches; do + git checkout $current_branch + + find docs -type f -name "*.md" -exec grep -Hn -E "\.webp|\.gif" {} \; > docs_used_images.json + find _partials -type f -name "*.mdx" -exec grep -Hn -E "\.webp|\.gif" {} \; > partials_used_images.json + cat docs_used_images.json partials_used_images.json > used_images.json + + line_number=1 + for img in $(cat all_images.json); do + if grep -q $img used_images.json; then + sed -i "${line_number}s|.*|${img},FOUND_USED|" all_images.json + fi + ((line_number++)) + done +done + +# Remove all marked used files to make up the list +sed '/FOUND_USED/d' all_images.json > unused_images.json +unused_image_count=$(wc -l < unused_images.json) +echo "Detected $unused_image_count unused webp assets in static/assets/docs/images that can be safely removed." + +# Clean up files +rm all_images.json +rm docs_used_images.json +rm partials_used_images.json +rm used_images.json