Skip to content

Commit

Permalink
docs: reduce linkinator config DOC-1346 (#3603)
Browse files Browse the repository at this point in the history
* docs: reduce linkinator concurrency DOC-1346

* docs: create bespoke security bulletins URL checker job DOC-1346

* docs: refine security bulletins task DOC-1346
  • Loading branch information
addetz authored Aug 20, 2024
1 parent bac3019 commit 689d3d3
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 5 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/url-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ jobs:

- name: URL Checker
run: make verify-url-links-ci

- name: URL Security Bulletins Checker
run: make verify-security-bulletins-links-ci

- name: Post Comment
run: |
Expand Down
49 changes: 45 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ CPUS := $(shell sysctl -n hw.ncpu | awk '{print int($$1 / 2)}')

ALOGLIA_CONFIG=$(shell cat docsearch.dev.config.json | jq -r tostring)

# Find all *.md files in docs, cut the prefix ./
# Remove all security-bulletins and cve-reports.md
VERIFY_URL_PATHS=$(shell find ./docs -name "*.md" | cut -c 3- | sed '/security-bulletins/d' | sed '/cve-reports/d' )

help: ## Display this help
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[0m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
Expand Down Expand Up @@ -172,7 +175,7 @@ pdf-local: ## Generate PDF from local docs
verify-url-links:
@echo "Checking for broken external URLs in markdown files..."
rm link_report.csv || echo "No report exists. Proceeding to scan step"
@npx linkinator "docs/**/*.md" --concurrency 50 --markdown --recurse --timeout 100000 --retry --retry-errors-jitter --retry-errors-count 5 \
@npx linkinator $(VERIFY_URL_PATHS) --concurrency 50 --markdown --recurse --timeout 100000 --retry --retry-errors-jitter --retry-errors-count 5 \
--skip "^https:\/\/docs\.spectrocloud\.com.*$$" \
--skip "^https:\/\/docs\.spectrocloud\.com\/.*\/supplemental\-packs$$" \
--skip "^http:\/\/docs\.spectrocloud\.com.*$$" \
Expand All @@ -182,15 +185,33 @@ verify-url-links:
--skip "\.(jpg|jpeg|png|gif|webp)$$" \
--skip "https:\/\/linux\.die\.net\/man\/.*$$" \
--skip "https:\/\/mysql\.com\/.*\.*$$" \
--skip "https:\/\/dev\.mysql\.com\/doc/\.*$$" \
--skip "https:\/\/dev\.mysql\.com\/doc\/.*$$" \
--format csv > temp_report.csv && sleep 2
@grep -E 'https?://' temp_report.csv > filtered_report.csv
@grep -E ',[[:space:]]*([4-9][0-9]{2}|[0-9]{4,}),' filtered_report.csv > link_report.csv && rm temp_report.csv filtered_report.csv

verify-security-bulletins-links:
@echo "Checking for broken URLs in security-bulletins markdown files..."
rm link_sec_bul_report.csv || echo "No security bulletins report exists. Proceeding to scan step"
@npx linkinator "docs/docs-content/security-bulletins/**/*.md" "docs/docs-content/security-bulletins/*.md" "docs/docs-content/unlisted/cve-reports.md" --concurrency 1 --markdown --recurse --timeout 100000 --retry --retry-errors-jitter --retry-errors-count 5 \
--skip "^https:\/\/docs\.spectrocloud\.com.*$$" \
--skip "^https:\/\/docs\.spectrocloud\.com\/.*\/supplemental\-packs$$" \
--skip "^http:\/\/docs\.spectrocloud\.com.*$$" \
--skip "^https:\/\/software-private\.spectrocloud\.com.*$$" \
--skip "^\/.*\.md$$" \
--skip "!\[.*\]\(.*\)$$" \
--skip "\.(jpg|jpeg|png|gif|webp)$$" \
--skip "https:\/\/linux\.die\.net\/man\/.*$$" \
--skip "https:\/\/mysql\.com\/.*\.*$$" \
--skip "https:\/\/dev\.mysql\.com\/doc\/.*$$" \
--format csv > temp_sec_bul_report.csv && sleep 2
@grep -E 'https?://' temp_sec_bul_report.csv > filtered_sec_bul_report.csv
@grep -E ',[[:space:]]*([4-9][0-9]{2}|[0-9]{4,}),' filtered_sec_bul_report.csv > link_sec_bul_report.csv && rm temp_sec_bul_report.csv filtered_sec_bul_report.csv

verify-url-links-ci: ## Check for broken URLs in production in a GitHub Actions CI environment
@echo "Checking for broken external URLs in CI environment..."
rm link_report.json || echo "No report exists. Proceeding to scan step"
@npx linkinator "docs/**/*.md" --concurrency 50 --markdown --recurse --timeout 100000 --retry-errors-jitter --retry --retry-errors-count 5 \
@npx linkinator $(VERIFY_URL_PATHS) --concurrency 50 --markdown --recurse --timeout 100000 --retry --retry-errors-jitter --retry-errors-count 5 \
--skip "^https:\/\/docs\.spectrocloud\.com.*$$" \
--skip "^https:\/\/docs\.spectrocloud\.com\/.*\/supplemental\-packs$$" \
--skip "^http:\/\/docs\.spectrocloud\.com.*$$" \
Expand All @@ -200,13 +221,33 @@ verify-url-links-ci: ## Check for broken URLs in production in a GitHub Actions
--skip "\.(jpg|jpeg|png|gif|webp)$$" \
--skip "https:\/\/linux\.die\.net\/man\/.*$$" \
--skip "https:\/\/mysql\.com\/.*\.*$$" \
--skip "https:\/\/dev\.mysql\.com\/doc/\.*$$" \
--skip "https:\/\/dev\.mysql\.com\/doc\/.*$$" \
--format json > temp_report.json
@# Use jq to filter out links that do not start with http or https and keep only broken links
@jq '[.links[] | select(.url | test("^https?://")) | select(.status >= 400)]' temp_report.json > filtered_report.json
@rm temp_report.json
@mv filtered_report.json scripts/link_report.json

verify-security-bulletins-links-ci: ## Check for broken URLs in production in a GitHub Actions CI environment
@echo "Checking for broken URLs in security-bulletins markdown files in CI environment..."
rm link_sec_bul_report.json || echo "No security bulletins report exists. Proceeding to scan step"
@npx linkinator "docs/docs-content/security-bulletins/**/*.md" "docs/docs-content/security-bulletins/*.md" "docs/docs-content/unlisted/cve-reports.md" --concurrency 1 --markdown --recurse --timeout 100000 --retry --retry-errors-jitter --retry-errors-count 5 \
--skip "^https:\/\/docs\.spectrocloud\.com.*$$" \
--skip "^https:\/\/docs\.spectrocloud\.com\/.*\/supplemental\-packs$$" \
--skip "^http:\/\/docs\.spectrocloud\.com.*$$" \
--skip "^https:\/\/software-private\.spectrocloud\.com.*$$" \
--skip "^\/.*\.md$$" \
--skip "!\[.*\]\(.*\)$$" \
--skip "\.(jpg|jpeg|png|gif|webp)$$" \
--skip "https:\/\/linux\.die\.net\/man\/.*$$" \
--skip "https:\/\/mysql\.com\/.*\.*$$" \
--skip "https:\/\/dev\.mysql\.com\/doc\/.*$$" \
--format json > temp_sec_bul_report.json
@# Use jq to filter out links that do not start with http or https and keep only broken links
@jq '[.links[] | select(.url | test("^https?://")) | select(.status >= 400)]' temp_sec_bul_report.json > filtered_sec_bul_report.json
@rm temp_sec_bul_report.json
@mv filtered_sec_bul_report.json scripts/link_sec_bul_report.json

###@ Image Formatting

format-images: ## Format images
Expand Down
17 changes: 16 additions & 1 deletion scripts/url-checker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ echo "Pull request number: $PR_NUMBER"

# Read JSON file contents into a variable
JSON_CONTENT=$(cat link_report.json)
JSON_SEC_BUL_CONTENT=$(cat link_sec_bul_report.json)


# Check if JSON file is empty
if [[ -z "$JSON_CONTENT" ]]; then
if [[ -z "$JSON_CONTENT" ]] && [[ -z "$JSON_SEC_BUL_CONTENT" ]]; then
echo "No broken links found"
exit 0
fi
Expand All @@ -57,6 +58,20 @@ for link in $(echo "${JSON_CONTENT}" | jq -c '.[]'); do
COMMENT="${COMMENT}\n\n:link: Broken URL: ${url} \n:red_circle: State: ${state} \n:arrow_up: Parent Page: ${parent}\n\n"
done

for link in $(echo "${JSON_SEC_BUL_CONTENT}" | jq -c '.[]'); do
url=$(echo "${link}" | jq -r '.url')
status=$(echo "${link}" | jq -r '.status')
state=$(echo "${link}" | jq -r '.state')
parent=$(echo "${link}" | jq -r '.parent')

# Increment counter for broken links if status is not "200"
if [[ "$status" != "200" ]]; then
((BROKEN_LINK_COUNT++))
fi

COMMENT="${COMMENT}\n\n:link: Broken URL: ${url} \n:red_circle: State: ${state} \n:arrow_up: Parent Page: ${parent}\n\n"
done

# Check if no broken links are found
if [[ "$BROKEN_LINK_COUNT" -eq 0 ]]; then
COMMENT=":tada: No broken external links found in the production report :tada:\n\nGreat job team! Keep up the good work!\n\nSource: :github: - librarium"
Expand Down

0 comments on commit 689d3d3

Please sign in to comment.