Skip to content

Commit

Permalink
docs: add clean up images job DOC-1232 (#4409)
Browse files Browse the repository at this point in the history
* docs: add clean up images job  DOC-1232

* docs: add on push for testing

* docs: add npm ci step

* docs: add fetch in script

* docs: add debug lines

* docs: adjust github action flow

* docs: add backport labels

* docs: remove results files from commit

* docs: add PR environment variable

* docs: add slack notification

* docs: remove on push trigger

* docs: adjust slack step if check

* docs: add pus for final testing

* docs: remove push trigger before merge

(cherry picked from commit c272eaf)
  • Loading branch information
addetz committed Oct 23, 2024
1 parent 5f0fded commit 40ee2ef
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 1 deletion.
112 changes: 112 additions & 0 deletions .github/workflows/clean-up-unused-images.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
name: Clean Up Unused Images

on:
schedule:
# On the first of every month at 2 am
- cron: '0 2 1 * *'
workflow_dispatch:

concurrency:
group: clean-up-images-${{ github.ref }}
cancel-in-progress: true

jobs:
find_unused_images:
runs-on: ubuntu-latest

steps:
- name: Retrieve Credentials
id: import-secrets
uses: hashicorp/[email protected]
with:
url: https://vault.prism.spectrocloud.com
method: approle
roleId: ${{ secrets.VAULT_ROLE_ID }}
secretId: ${{ secrets.VAULT_SECRET_ID }}
secrets: /providers/github/organizations/spectrocloud/token?org_name=spectrocloud token | VAULT_GITHUB_TOKEN

- id: checkout
name: Checkout Repository
uses: actions/checkout@v4
with:
token: ${{ steps.import-secrets.outputs.VAULT_GITHUB_TOKEN }}

- name: Setup Nodejs
uses: actions/setup-node@v4
with:
node-version: "20"

- run: npm ci

- name: Find unused images
run: make find-unused-images

- name: Set Git User
# see: https://github.com/actions/checkout/issues/13#issuecomment-724415212
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
- name: Create PR with unused images
run: |
unused_image_count=$(wc -l < unused_images.json)
if unused_image_count == 0; then
"No images found to remove."
exit 0
fi
# Ensure that we are on master.
git checkout master
# Create a new branch.
branch_name="clean-up-unused-images-$(date +%Y%m%d%H%M%S)"
git checkout -b "$branch_name"
# Remove all the images identified as unused.
for img in $(cat unused_images.json); do
rm static/assets/docs/images/$img
done
# Construct backport labels.
backport_labels="auto-backport"
for branch in $(cat evaluated_branches.json); do
if [[ $branch =~ version-[0-9]+(-[0-9]+)*$ ]]; then
backport_labels+=",backport-$branch"
fi
done
# Clean up results file.
rm unused_images.json
rm evaluated_branches.json
# Commit and push branch
git add .
git commit -m "docs: clean up unused images"
git push origin $branch_name
# Create the pull request
pr_body='
## Describe the Change
This PR removes images identified as unused across all our branches.
The images are identified using `scripts/find-unused-images.sh` script.
Please review this PR carefully before merging it.'
output=$(gh pr create --base master --title "docs: clean up librarium unused images " --body "$pr_body" --label "$backport_labels")
pr_url=$(echo "$output" | grep -o "https://[^ ]*")
echo "PR successfully created $pr_url."
echo "GITHUB_CREATED_CLEANUP_PR=$pr_url" >> $GITHUB_ENV
env:
GH_TOKEN: ${{ steps.import-secrets.outputs.VAULT_GITHUB_TOKEN }}

- name: Slack Notification
if: ${{ env.GITHUB_CREATED_CLEANUP_PR != ''}}
uses: rtCamp/[email protected]
env:
SLACK_WEBHOOK: ${{ secrets.SLACK_PRIVATE_TEAM_WEBHOOK }}
SLACK_USERNAME: "spectromate"
SLACK_ICON_EMOJI: ":ok_hand:"
SLACK_COLOR: ${{ job.status }}
SLACKIFY_MARKDOWN: true
ENABLE_ESCAPES: true
SLACK_MESSAGE: 'A new PR with unused images to clean up was created. Please review ${{env.GITHUB_CREATED_CLEANUP_PR}} for more details.'
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,5 @@ static/img/packs
vale/styles/spectrocloud/
vale/styles/spectrocloud-docs-internal/
vale/styles/config/vocabularies/spectrocloud-vocab

unused_images.json
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ clean-visuals:
@echo "Cleaning visual regression tests"

rm -rf test-results/ playwright-report/ screenshots/


##@ npm Targets

Expand Down Expand Up @@ -221,6 +220,12 @@ format-images: ## Format images
@echo "formatting images in /static/assets/docs/images/ folder"
./scripts/compress-convert-images.sh

###@ Find unused images assets

find-unused-images:
@echo "Find unused image assets"
./scripts/find-unused-images.sh

###@ Generate _partials/index.ts required to automatic partials usage.

generate-partials: ## Generate
Expand Down
53 changes: 53 additions & 0 deletions scripts/find-unused-images.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash

# Enable error handling
set -e

# Create a list of all the images we have and save it to a json.
# Trim the path static/assets/docs/images.
find static/assets/docs/images -type f \( -name "*.gif" -o -name "*.webp" \) ! -name ".DS_STORE" ! -name ".DS_Store" | sed 's|static/assets/docs/images||g' > all_images.json
image_count=$(wc -l < all_images.json)
echo "Detected $image_count .webp and .gif assets in static/assets/docs/images..."

# Fetch all branches
git fetch --all

# List all the version branches
branches="master"
version_branches=$(git branch -a | grep -E 'version-[0-9]+(-[0-9]+)*$')
for version_branch in $version_branches; do
# Remove leading spaces and remote prefix (if any)
version_branch=$(echo $version_branch | sed 's/ *//;s/remotes\/origin\///')

branches+=" $version_branch"
done

echo "Evaluating the following branches for image usage: { $branches }"
echo "$branches" > evaluated_branches.json

for current_branch in $branches; do
git checkout $current_branch

find docs -type f -name "*.md" -exec grep -Hn -E "\.webp|\.gif" {} \; > docs_used_images.json
find _partials -type f -name "*.mdx" -exec grep -Hn -E "\.webp|\.gif" {} \; > partials_used_images.json
cat docs_used_images.json partials_used_images.json > used_images.json

line_number=1
for img in $(cat all_images.json); do
if grep -q $img used_images.json; then
sed -i "${line_number}s|.*|${img},FOUND_USED|" all_images.json
fi
((line_number++))
done
done

# Remove all marked used files to make up the list
sed '/FOUND_USED/d' all_images.json > unused_images.json
unused_image_count=$(wc -l < unused_images.json)
echo "Detected $unused_image_count unused webp assets in static/assets/docs/images that can be safely removed."

# Clean up files
rm all_images.json
rm docs_used_images.json
rm partials_used_images.json
rm used_images.json

0 comments on commit 40ee2ef

Please sign in to comment.