From 075f59b57e7c43572dbd89192f9f9f771e2da7a3 Mon Sep 17 00:00:00 2001 From: addetz <43963729+addetz@users.noreply.github.com> Date: Mon, 16 Sep 2024 12:24:12 +0100 Subject: [PATCH] docs: fix broken links and adjust linkinator config DOC-1382 --- .github/workflows/url-checks.yaml | 4 +- Makefile | 88 ++++++------------- .../legal-licenses/oss-licenses.md | 4 +- .../airgap/offline-docs.md | 6 +- linkinator/linkinator-ci.config.json | 25 ++++++ .../linkinator-rate-limit-ci.config.json | 25 ++++++ linkinator/linkinator-rate-limit.config.json | 26 ++++++ linkinator/linkinator.config.json | 25 ++++++ scripts/url-checker.sh | 6 +- 9 files changed, 137 insertions(+), 72 deletions(-) create mode 100644 linkinator/linkinator-ci.config.json create mode 100644 linkinator/linkinator-rate-limit-ci.config.json create mode 100644 linkinator/linkinator-rate-limit.config.json create mode 100644 linkinator/linkinator.config.json diff --git a/.github/workflows/url-checks.yaml b/.github/workflows/url-checks.yaml index 3d9bb8d95e1..290bee96414 100644 --- a/.github/workflows/url-checks.yaml +++ b/.github/workflows/url-checks.yaml @@ -29,8 +29,8 @@ jobs: - name: URL Checker run: make verify-url-links-ci - - name: URL Security Bulletins Checker - run: make verify-security-bulletins-links-ci + - name: URL Rate Limit Checker + run: make verify-rate-limited-links-ci - name: Post Comment run: | diff --git a/Makefile b/Makefile index 52cd169137e..383eb3573c5 100644 --- a/Makefile +++ b/Makefile @@ -13,8 +13,14 @@ CPUS := $(shell sysctl -n hw.ncpu | awk '{print int($$1 / 2)}') ALOGLIA_CONFIG=$(shell cat docsearch.dev.config.json | jq -r tostring) # Find all *.md files in docs, cut the prefix ./ -# Remove all security-bulletins and cve-reports.md -VERIFY_URL_PATHS=$(shell find ./docs -name "*.md" | cut -c 3- | sed '/security-bulletins/d' | sed '/cve-reports/d' ) +# Remove all security-bulletins and cve-reports.md because they are rate limited by nvd.nist.gov +# Remove oss-licenses.md because they are rate limited by npmjs.com +VERIFY_URL_PATHS=$(shell find ./docs -name "*.md" | cut -c 3- | sed '/security-bulletins/d' | sed '/cve-reports/d' | sed '/oss-licenses/d') + +RATE_LIMITED_FILES_LIST:="docs/docs-content/security-bulletins/**/*.md" \ + "docs/docs-content/security-bulletins/*.md" \ + "docs/docs-content/unlisted/cve-reports.md" \ + "docs/docs-content/legal-licenses/oss-licenses.md" help: ## Display this help @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[0m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) @@ -175,78 +181,36 @@ pdf-local: ## Generate PDF from local docs verify-url-links: @echo "Checking for broken external URLs in markdown files..." rm link_report.csv || echo "No report exists. Proceeding to scan step" - @npx linkinator $(VERIFY_URL_PATHS) --concurrency 50 --markdown --recurse --timeout 100000 --retry --retry-errors-jitter --retry-errors-count 5 \ - --skip "^https:\/\/docs\.spectrocloud\.com.*$$" \ - --skip "^https:\/\/docs\.spectrocloud\.com\/.*\/supplemental\-packs$$" \ - --skip "^http:\/\/docs\.spectrocloud\.com.*$$" \ - --skip "^https:\/\/software-private\.spectrocloud\.com.*$$" \ - --skip "^\/.*\.md$$" \ - --skip "!\[.*\]\(.*\)$$" \ - --skip "\.(jpg|jpeg|png|gif|webp)$$" \ - --skip "https:\/\/linux\.die\.net\/man\/.*$$" \ - --skip "https:\/\/mysql\.com\/.*\.*$$" \ - --skip "https:\/\/dev\.mysql\.com\/doc\/.*$$" \ - --format csv > temp_report.csv && sleep 2 + @npx linkinator $(VERIFY_URL_PATHS) --config ./linkinator/linkinator.config.json > temp_report.csv && sleep 2 @grep -E 'https?://' temp_report.csv > filtered_report.csv @grep -E ',[[:space:]]*([4-9][0-9]{2}|[0-9]{4,}),' filtered_report.csv > link_report.csv && rm temp_report.csv filtered_report.csv -verify-security-bulletins-links: - @echo "Checking for broken URLs in security-bulletins markdown files..." - rm link_sec_bul_report.csv || echo "No security bulletins report exists. Proceeding to scan step" - @npx linkinator "docs/docs-content/security-bulletins/**/*.md" "docs/docs-content/security-bulletins/*.md" "docs/docs-content/unlisted/cve-reports.md" --concurrency 1 --markdown --recurse --timeout 100000 --retry --retry-errors-jitter --retry-errors-count 5 \ - --skip "^https:\/\/docs\.spectrocloud\.com.*$$" \ - --skip "^https:\/\/docs\.spectrocloud\.com\/.*\/supplemental\-packs$$" \ - --skip "^http:\/\/docs\.spectrocloud\.com.*$$" \ - --skip "^https:\/\/software-private\.spectrocloud\.com.*$$" \ - --skip "^\/.*\.md$$" \ - --skip "!\[.*\]\(.*\)$$" \ - --skip "\.(jpg|jpeg|png|gif|webp)$$" \ - --skip "https:\/\/linux\.die\.net\/man\/.*$$" \ - --skip "https:\/\/mysql\.com\/.*\.*$$" \ - --skip "https:\/\/dev\.mysql\.com\/doc\/.*$$" \ - --format csv > temp_sec_bul_report.csv && sleep 2 - @grep -E 'https?://' temp_sec_bul_report.csv > filtered_sec_bul_report.csv - @grep -E ',[[:space:]]*([4-9][0-9]{2}|[0-9]{4,}),' filtered_sec_bul_report.csv > link_sec_bul_report.csv && rm temp_sec_bul_report.csv filtered_sec_bul_report.csv +verify-rate-limited-links: + @echo "Checking for broken URLs in security-bulletins and oss-licenses markdown files..." + @rm link_rate_limit_report.csv || echo "No rate limited report exists. Proceeding to scan step" + @echo "Checking the following paths: $(RATE_LIMITED_FILES_LIST)" + @npx linkinator $(RATE_LIMITED_FILES_LIST) --config ./linkinator/linkinator-rate-limit.config.json > temp_rate_limit_report.csv && sleep 2 + @grep -E 'https?://' temp_rate_limit_report.csv > filtered_rate_limit_report.csv + @grep -E ',[[:space:]]*([4-9][0-9]{2}|[0-9]{4,}),' filtered_rate_limit_report.csv > link_rate_limit_report.csv && rm temp_rate_limit_report.csv filtered_rate_limit_report.csv verify-url-links-ci: ## Check for broken URLs in production in a GitHub Actions CI environment @echo "Checking for broken external URLs in CI environment..." - rm link_report.json || echo "No report exists. Proceeding to scan step" - @npx linkinator $(VERIFY_URL_PATHS) --concurrency 50 --markdown --recurse --timeout 100000 --retry --retry-errors-jitter --retry-errors-count 5 \ - --skip "^https:\/\/docs\.spectrocloud\.com.*$$" \ - --skip "^https:\/\/docs\.spectrocloud\.com\/.*\/supplemental\-packs$$" \ - --skip "^http:\/\/docs\.spectrocloud\.com.*$$" \ - --skip "^https:\/\/software-private\.spectrocloud\.com.*$$" \ - --skip "^\/.*\.md$$" \ - --skip "!\[.*\]\(.*\)$$" \ - --skip "\.(jpg|jpeg|png|gif|webp)$$" \ - --skip "https:\/\/linux\.die\.net\/man\/.*$$" \ - --skip "https:\/\/mysql\.com\/.*\.*$$" \ - --skip "https:\/\/dev\.mysql\.com\/doc\/.*$$" \ - --format json > temp_report.json + @rm link_report.json || echo "No report exists. Proceeding to scan step" + @npx linkinator $(VERIFY_URL_PATHS) --config ./linkinator/linkinator-ci.config.json > temp_report.json @# Use jq to filter out links that do not start with http or https and keep only broken links @jq '[.links[] | select(.url | test("^https?://")) | select(.status >= 400)]' temp_report.json > filtered_report.json @rm temp_report.json @mv filtered_report.json scripts/link_report.json -verify-security-bulletins-links-ci: ## Check for broken URLs in production in a GitHub Actions CI environment - @echo "Checking for broken URLs in security-bulletins markdown files in CI environment..." - rm link_sec_bul_report.json || echo "No security bulletins report exists. Proceeding to scan step" - @npx linkinator "docs/docs-content/security-bulletins/**/*.md" "docs/docs-content/security-bulletins/*.md" "docs/docs-content/unlisted/cve-reports.md" --concurrency 1 --markdown --recurse --timeout 100000 --retry --retry-errors-jitter --retry-errors-count 5 \ - --skip "^https:\/\/docs\.spectrocloud\.com.*$$" \ - --skip "^https:\/\/docs\.spectrocloud\.com\/.*\/supplemental\-packs$$" \ - --skip "^http:\/\/docs\.spectrocloud\.com.*$$" \ - --skip "^https:\/\/software-private\.spectrocloud\.com.*$$" \ - --skip "^\/.*\.md$$" \ - --skip "!\[.*\]\(.*\)$$" \ - --skip "\.(jpg|jpeg|png|gif|webp)$$" \ - --skip "https:\/\/linux\.die\.net\/man\/.*$$" \ - --skip "https:\/\/mysql\.com\/.*\.*$$" \ - --skip "https:\/\/dev\.mysql\.com\/doc\/.*$$" \ - --format json > temp_sec_bul_report.json +verify-rate-limited-links-ci: ## Check for broken URLs in production in a GitHub Actions CI environment + @echo "Checking for broken URLs in security-bulletins and oss-licenses markdown files in CI environment..." + @rm link_rate_limit_report.json || echo "No rate limited report exists. Proceeding to scan step" + @echo "Checking the following paths: $(RATE_LIMITED_FILES_LIST)" + @npx linkinator $(RATE_LIMITED_FILES_LIST) --config ./linkinator/linkinator-rate-limit-ci.config.json > temp_rate_limit_report.json @# Use jq to filter out links that do not start with http or https and keep only broken links - @jq '[.links[] | select(.url | test("^https?://")) | select(.status >= 400)]' temp_sec_bul_report.json > filtered_sec_bul_report.json - @rm temp_sec_bul_report.json - @mv filtered_sec_bul_report.json scripts/link_sec_bul_report.json + @jq '[.links[] | select(.url | test("^https?://")) | select(.status >= 400)]' temp_rate_limit_report.json > filtered_rate_limit_report.json + @rm temp_rate_limit_report.json + @mv filtered_rate_limit_report.json scripts/link_rate_limit_report.json ###@ Image Formatting diff --git a/docs/docs-content/legal-licenses/oss-licenses.md b/docs/docs-content/legal-licenses/oss-licenses.md index 3b0f9ff70ee..35aecee3f42 100644 --- a/docs/docs-content/legal-licenses/oss-licenses.md +++ b/docs/docs-content/legal-licenses/oss-licenses.md @@ -472,7 +472,7 @@ have any questions or concerns, contact us at support@spectrocloud.com. | https://www.npmjs.com/package/query-string | [MIT](https://opensource.org/licenses/MIT) | | https://www.npmjs.com/package/react | [MIT](https://opensource.org/licenses/MIT) | | https://www.npmjs.com/package/react-calendar | [MIT](https://opensource.org/licenses/MIT) | -| https://www.npmjs.com/package/react-clipboard.js | [CC0-1.0](https://opensource.org/licenses/CC0-1.0) | +| https://www.npmjs.com/package/react-clipboard.js | [CC0-1.0](https://creativecommons.org/publicdomain/zero/1.0/legalcode.en) | | https://www.npmjs.com/package/react-dev-utils | [MIT](https://opensource.org/licenses/MIT) | | https://www.npmjs.com/package/react-dom | [MIT](https://opensource.org/licenses/MIT) | | https://www.npmjs.com/package/react-helmet | [MIT](https://opensource.org/licenses/MIT) | @@ -702,7 +702,7 @@ have any questions or concerns, contact us at support@spectrocloud.com. | https://www.npmjs.com/package/locate-path | [MIT](https://opensource.org/licenses/MIT) | | https://www.npmjs.com/package/lodash.camelcase | [MIT](https://opensource.org/licenses/MIT) | | https://www.npmjs.com/package/lodash.isequalwith | [MIT](https://opensource.org/licenses/MIT) | -| https://www.npmjs.com/package/lodash.once | [CC0-1.0](https://opensource.org/licenses/CC0-1.0) | +| https://www.npmjs.com/package/lodash.once | [CC0-1.0](https://creativecommons.org/publicdomain/zero/1.0/legalcode.en) | | https://www.npmjs.com/package/lodash.upperfirst | [MIT](https://opensource.org/licenses/MIT) | | https://www.npmjs.com/package/loose-envify | [MIT](https://opensource.org/licenses/MIT) | | https://www.npmjs.com/package/lru-cache | [ISC](https://opensource.org/licenses/ISC) | diff --git a/docs/docs-content/vertex/install-palette-vertex/airgap/offline-docs.md b/docs/docs-content/vertex/install-palette-vertex/airgap/offline-docs.md index a558e0d4252..d2596903192 100644 --- a/docs/docs-content/vertex/install-palette-vertex/airgap/offline-docs.md +++ b/docs/docs-content/vertex/install-palette-vertex/airgap/offline-docs.md @@ -45,7 +45,7 @@ The following software must be installed on your system: - [tar](https://www.gnu.org/software/tar/) - This is only required if you need to deploy the offline documentation to a device without internet access. -- [cosign](https://docs.sigstore.dev/system_config/installation) - Not required unless you want to verify the +- [cosign](https://docs.sigstore.dev/cosign/system_config/installation/) - Not required unless you want to verify the authenticity of the container image. Review the [Container Image Authenticity](#container-image-authenticity) section for more information. @@ -113,8 +113,8 @@ image is signed using a cryptographic key pair that is private and stored intern documentation repository at [**static/cosign.pub**](https://raw.githubusercontent.com/spectrocloud/librarium/master/static/cosign.pub). Use the public key to verify the authenticity of the container image. You can learn more about the container image signing -process by reviewing the [Signing Containers](https://docs.sigstore.dev/signing/signing_with_containers) documentation -page. +process by reviewing the [Signing Containers](https://docs.sigstore.dev/cosign/signing/signing_with_containers/) +documentation page. :::info diff --git a/linkinator/linkinator-ci.config.json b/linkinator/linkinator-ci.config.json new file mode 100644 index 00000000000..1e5206d56c4 --- /dev/null +++ b/linkinator/linkinator-ci.config.json @@ -0,0 +1,25 @@ +{ + "concurrency": 50, + "markdown": true, + "recurse": true, + "timeout": 100000, + "retry": true, + "retryErrors": true, + "retryErrorsJitter": 5000, + "retryErrorsCount": 5, + "format": "json", + "skip": [ + "^https://docs.spectrocloud.com.*$$", + "^https://docs.spectrocloud.com/.*/supplemental-packs$$", + "^http://docs.spectrocloud.com.*$$", + "^https://software-private.spectrocloud.com.*$$", + "^/.*.md$$", + "![.*](.*)$$", + ".(jpg|jpeg|png|gif|webp)$$", + "https://linux.die.net/man/.*$$", + "https://mysql.com/.*.*$$", + "https://dev.mysql.com/doc/.*$$" + ], + "verbosity": "error", + "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36" +} diff --git a/linkinator/linkinator-rate-limit-ci.config.json b/linkinator/linkinator-rate-limit-ci.config.json new file mode 100644 index 00000000000..36efd474728 --- /dev/null +++ b/linkinator/linkinator-rate-limit-ci.config.json @@ -0,0 +1,25 @@ +{ + "concurrency": 1, + "markdown": true, + "recurse": false, + "timeout": 100000, + "retry": true, + "retryErrors": true, + "retryErrorsJitter": 5000, + "retryErrorsCount": 5, + "format": "json", + "skip": [ + "^https://docs.spectrocloud.com.*$$", + "^https://docs.spectrocloud.com/.*/supplemental-packs$$", + "^http://docs.spectrocloud.com.*$$", + "^https://software-private.spectrocloud.com.*$$", + "^/.*.md$$", + "![.*](.*)$$", + ".(jpg|jpeg|png|gif|webp)$$", + "https://linux.die.net/man/.*$$", + "https://mysql.com/.*.*$$", + "https://dev.mysql.com/doc/.*$$" + ], + "verbosity": "error", + "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36" +} diff --git a/linkinator/linkinator-rate-limit.config.json b/linkinator/linkinator-rate-limit.config.json new file mode 100644 index 00000000000..8591bf85c37 --- /dev/null +++ b/linkinator/linkinator-rate-limit.config.json @@ -0,0 +1,26 @@ +{ + "concurrency": 1, + "markdown": true, + "recurse": false, + "timeout": 100000, + "retry": true, + "retryErrors": true, + "retryErrorsJitter": 5000, + "retryErrorsCount": 5, + "format": "csv", + "skip": [ + "^https://docs.spectrocloud.com.*$$", + "^https://docs.spectrocloud.com/.*/supplemental-packs$$", + "^http://docs.spectrocloud.com.*$$", + "^https://software-private.spectrocloud.com.*$$", + "^/.*.md$$", + "^/.*.md#*$$", + "![.*](.*)$$", + ".(jpg|jpeg|png|gif|webp)$$", + "https://linux.die.net/man/.*$$", + "https://mysql.com/.*.*$$", + "https://dev.mysql.com/doc/.*$$" + ], + "verbosity": "error", + "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36" +} diff --git a/linkinator/linkinator.config.json b/linkinator/linkinator.config.json new file mode 100644 index 00000000000..89703a6c121 --- /dev/null +++ b/linkinator/linkinator.config.json @@ -0,0 +1,25 @@ +{ + "concurrency": 50, + "markdown": true, + "recurse": true, + "timeout": 100000, + "retry": true, + "retryErrors": true, + "retryErrorsJitter": 5000, + "retryErrorsCount": 5, + "format": "csv", + "skip": [ + "^https://docs.spectrocloud.com.*$$", + "^https://docs.spectrocloud.com/.*/supplemental-packs$$", + "^http://docs.spectrocloud.com.*$$", + "^https://software-private.spectrocloud.com.*$$", + "^/.*.md$$", + "![.*](.*)$$", + ".(jpg|jpeg|png|gif|webp)$$", + "https://linux.die.net/man/.*$$", + "https://mysql.com/.*.*$$", + "https://dev.mysql.com/doc/.*$$" + ], + "verbosity": "error", + "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36" +} diff --git a/scripts/url-checker.sh b/scripts/url-checker.sh index d07701a6617..66747fcfc8b 100755 --- a/scripts/url-checker.sh +++ b/scripts/url-checker.sh @@ -28,11 +28,11 @@ echo "Pull request number: $PR_NUMBER" # Read JSON file contents into a variable JSON_CONTENT=$(cat link_report.json) -JSON_SEC_BUL_CONTENT=$(cat link_sec_bul_report.json) +JSON_RATE_LIMIT_CONTENT=$(cat link_rate_limit_report.json) # Check if JSON file is empty -if [[ -z "$JSON_CONTENT" ]] && [[ -z "$JSON_SEC_BUL_CONTENT" ]]; then +if [[ -z "$JSON_CONTENT" ]] && [[ -z "$JSON_RATE_LIMIT_CONTENT" ]]; then echo "No broken links found" exit 0 fi @@ -58,7 +58,7 @@ for link in $(echo "${JSON_CONTENT}" | jq -c '.[]'); do COMMENT="${COMMENT}\n\n:link: Broken URL: ${url} \n:red_circle: State: ${state} \n:arrow_up: Parent Page: ${parent}\n\n" done -for link in $(echo "${JSON_SEC_BUL_CONTENT}" | jq -c '.[]'); do +for link in $(echo "${JSON_RATE_LIMIT_CONTENT}" | jq -c '.[]'); do url=$(echo "${link}" | jq -r '.url') status=$(echo "${link}" | jq -r '.status') state=$(echo "${link}" | jq -r '.state')