Update on gsg #154
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (C) 2024 Intel Corporation | |
# SPDX-License-Identifier: Apache-2.0 | |
name: Check Paths and Hyperlinks | |
on: | |
pull_request: | |
branches: [main] | |
types: [opened, reopened, ready_for_review, synchronize] | |
jobs: | |
check-the-validity-of-hyperlinks-in-README: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Clean Up Working Directory | |
run: sudo rm -rf ${{github.workspace}}/* | |
- name: Checkout Repo docs | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Check the Validity of Hyperlinks | |
run: | | |
cd ${{github.workspace}} | |
fail="FALSE" | |
# url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .|grep -Ev 'linkedin') | |
merged_commit=$(git log -1 --format='%H') | |
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" | |
if [ -n "$changed_files" ]; then | |
for changed_file in $changed_files; do | |
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'linkedin') || true | |
if [ -n "$url_lines" ]; then | |
for url_line in $url_lines; do | |
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') | |
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) | |
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$response" -ne 200 ]; then | |
echo "**********Validation failed, status code: $response, try again**********" | |
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$response_retry" -eq 200 ]; then | |
echo "*****Retry successful*****" | |
else | |
urls_line+=("$url_line") | |
echo "Status code: $response_retry, Link $url validation failed, will retry later." | |
fi | |
fi | |
done | |
fi | |
done | |
fi | |
echo "**************Start Retry**************" | |
for link in "${urls_line[@]}"; do | |
url=$(echo "$link"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') | |
path=$(echo "$link"|cut -d':' -f1 | cut -d'/' -f2-) | |
attempt_num=1 | |
while [ $attempt_num -le 5 ]; do | |
do_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") | |
if [ "$do_retry" -eq 200 ]; then | |
echo "$url Retry successful" | |
break | |
else | |
echo "$url Validation failed, retrying..." | |
((attempt_num++)) | |
sleep 10 | |
fi | |
done | |
if [ $attempt_num -gt 5 ]; then | |
echo "Invalid link from ${{github.workspace}}/$path: $url status code: $do_retry" | |
fail="TRUE" | |
fi | |
done | |
if [[ "$fail" == "TRUE" ]]; then | |
exit 1 | |
else | |
echo "All hyperlinks are valid." | |
fi | |
shell: bash | |
check-the-validity-of-relative-path: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Clean up Working Directory | |
run: sudo rm -rf ${{github.workspace}}/* | |
- name: Checkout Repo docs | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Checking Relative Path Validity | |
run: | | |
cd ${{github.workspace}} | |
fail="FALSE" | |
repo_name=${{ github.event.pull_request.head.repo.full_name }} | |
if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then | |
owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1) | |
branch="https://github.com/$owner/docs/tree/${{ github.event.pull_request.head.ref }}" | |
else | |
branch="https://github.com/opea-project/docs/blob/${{ github.event.pull_request.head.ref }}" | |
fi | |
link_head="https://github.com/opea-project/docs/blob/main" | |
merged_commit=$(git log -1 --format='%H') | |
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" | |
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http'|grep -Ev 'mailto'|grep -Ev 'portal.azure.com') | |
if [ -n "$png_lines" ]; then | |
for png_line in $png_lines; do | |
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) | |
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) | |
if [[ "${png_path:0:1}" == "/" ]]; then | |
check_path="."$(echo "$png_path" | cut -d '#' -f1) | |
elif [[ "$png_path" == *#* ]]; then | |
relative_path=$(echo "$png_path" | cut -d '#' -f1) | |
if [ -n "$relative_path" ]; then | |
check_path=$(dirname "$refer_path")/$relative_path | |
png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}') | |
else | |
check_path=$refer_path | |
fi | |
else | |
check_path=$(dirname "$refer_path")/$png_path | |
fi | |
if [ -e "$check_path" ]; then | |
real_path=$(realpath $check_path) | |
if [[ "$png_line" == *#* ]]; then | |
if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then | |
url_dev=$branch$(echo "$real_path" | sed 's|.*/docs||')$png_path | |
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev") | |
if [ "$response" -ne 200 ]; then | |
echo "**********Validation failed, try again**********" | |
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev") | |
if [ "$response_retry" -eq 200 ]; then | |
echo "*****Retry successfully*****" | |
else | |
echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev" | |
fail="TRUE" | |
fi | |
else | |
echo "Validation succeed $png_line" | |
fi | |
fi | |
fi | |
else | |
echo "$check_path does not exist $png_line" | |
fail="TRUE" | |
fi | |
done | |
fi | |
if [[ "$fail" == "TRUE" ]]; then | |
exit 1 | |
else | |
echo "All relative links valid." | |
fi | |
shell: bash |