diff --git a/.github/workflows/api-checks.yml b/.github/workflows/api-checks.yml index 39f7de03dbd..263bdb75677 100644 --- a/.github/workflows/api-checks.yml +++ b/.github/workflows/api-checks.yml @@ -19,7 +19,7 @@ on: - "docs/api/qiskit-ibm-provider/**/*" - "docs/api/qiskit-ibm-runtime/**/*" - "public/api/**/*" - - "scripts/checkLinks.ts" + - "scripts/checkInternalLinks.ts" - "scripts/lib/links/ignores.ts" jobs: @@ -36,7 +36,7 @@ jobs: run: npm ci - name: Check internal links run: > - npm run check:links -- + npm run check:internal-links -- --qiskit-release-notes --current-apis --dev-apis diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ede2b038e6d..c9d773120df 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -31,7 +31,7 @@ jobs: - name: Spellcheck run: npm run check:spelling - name: Internal link checker - run: npm run check:links + run: npm run check:internal-links - name: Formatting run: npm run check:fmt - name: Typecheck @@ -54,4 +54,4 @@ jobs: if: steps.changed-docs-files.outputs.any_changed == 'true' run: | echo "${{ steps.changed-docs-files.outputs.all_changed_files }}" > changed.txt - npm run check-pages-render -- --from-file changed.txt + npm run check:pages-render -- --from-file changed.txt diff --git a/.github/workflows/weekly-checks.yml b/.github/workflows/weekly-checks.yml index 52fad89e6c9..367fa3f538f 100644 --- a/.github/workflows/weekly-checks.yml +++ b/.github/workflows/weekly-checks.yml @@ -34,7 +34,7 @@ jobs: sleep 20 - name: Check all pages render run: > - npm run check-pages-render -- + npm run check:pages-render -- --non-api --qiskit-release-notes --current-apis @@ -54,10 +54,6 @@ jobs: run: npm ci - name: Check external links run: > - npm run check:links -- - --qiskit-release-notes - --current-apis - --dev-apis - --historical-apis - --skip-broken-historical - --external + npm run check:external-links -- + 'docs/**/*.{md,mdx,ipynb}' + '!docs/api/qiskit/[0-9]*/*' diff --git a/README.md b/README.md index 07963eea9a7..018b3d6ad44 100644 --- a/README.md +++ b/README.md @@ -178,27 +178,35 @@ about it. ## Check for broken links -CI will check for broken links. You can also check locally: +We have two broken link checkers: for internal links and for external links. -```bash -# Only check for internal broken links -npm run check:links +To check internal links: -# Enable the validation of external links -npm run check:links -- --external +```bash +# Only check non-API docs +npm run check:internal-links -# By default, only the non-API docs are checked. You can add any of the -# below arguments to also check API docs. -npm run check:links -- --current-apis --dev-apis --historical-apis --qiskit-release-notes +# You can add any of the below arguments to also check API docs. +npm run check:internal-links -- --current-apis --dev-apis --historical-apis --qiskit-release-notes # However, `--historical-apis` currently has failing versions, so you may # want to add `--skip-broken-historical`. -npm run check:links -- --historical-apis --skip-broken-historical +npm run check:internal-links -- --historical-apis --skip-broken-historical # Or, run all the checks. Although this only checks non-API docs. npm run check ``` +To check external links: + +```bash +# Specify the files you want after `--` +npm run check:external-links -- docs/run/index.md docs/run/circuit-execution.mdx + +# You can also use globs +npm run check:external-links -- 'docs/run/*' '!docs/run/index.mdx' +``` + ## Check file metadata Every file needs to have a `title` and `description`. The `lint` job in CI will fail with instructions for any bad file. @@ -260,9 +268,9 @@ It's possible to write broken pages that crash when loaded. This is usually due To check that all the non-API docs render: 1. Start up the local preview with `./start` by following the instructions at [Preview the docs locally](#preview-the-docs-locally) -2. In a new tab, `npm run check-pages-render -- --non-api` +2. In a new tab, `npm run check:pages-render -- --non-api` -You can also check that API docs and translations render by using any of these arguments: `npm run check-pages-render -- --non-api --qiskit-release-notes --current-apis --dev-apis --historical-apis --translations`. Warning that this is exponentially slower. +You can also check that API docs and translations render by using any of these arguments: `npm run check:pages-render -- --non-api --qiskit-release-notes --current-apis --dev-apis --historical-apis --translations`. Warning that this is exponentially slower. CI will check on every PR that any changed files render correctly. We also run a weekly cron job to check that every page renders correctly. diff --git a/package.json b/package.json index ad66f94f4cd..63836c62883 100644 --- a/package.json +++ b/package.json @@ -5,12 +5,13 @@ "author": "Qiskit Development Team", "license": "Apache-2.0", "scripts": { - "check": "npm run check:metadata && npm run check:spelling && npm run check:links && npm run check:fmt", + "check": "npm run check:metadata && npm run check:spelling && npm run check:internal-links && npm run check:fmt", "check:metadata": "node -r esbuild-register scripts/commands/checkMetadata.ts", - "check:links": "node -r esbuild-register scripts/commands/checkLinks.ts", "check:spelling": "cspell --relative --no-progress docs/**/*.md* docs/api/**/*.md* --config cspell/cSpell.json", "check:fmt": "prettier --check .", - "check-pages-render": "node -r esbuild-register scripts/commands/checkPagesRender.ts", + "check:internal-links": "node -r esbuild-register scripts/commands/checkInternalLinks.ts", + "check:external-links": "node -r esbuild-register scripts/commands/checkExternalLinks.ts", + "check:pages-render": "node -r esbuild-register scripts/commands/checkPagesRender.ts", "fmt": "prettier --write .", "test": "jest", "typecheck": "tsc", diff --git a/scripts/commands/checkExternalLinks.ts b/scripts/commands/checkExternalLinks.ts new file mode 100644 index 00000000000..24aeae4b3a4 --- /dev/null +++ b/scripts/commands/checkExternalLinks.ts @@ -0,0 +1,78 @@ +// This code is a Qiskit project. +// +// (C) Copyright IBM 2024. +// +// This code is licensed under the Apache License, Version 2.0. You may +// obtain a copy of this license in the LICENSE file in the root directory +// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +// +// Any modifications or derivative works of this code must retain this +// copyright notice, and modified files need to carry a notice indicating +// that they have been altered from the originals. + +import { globby } from "globby"; +import yargs from "yargs/yargs"; +import { hideBin } from "yargs/helpers"; + +import { ExternalLink } from "../lib/links/ExternalLink"; +import { parseFile } from "../lib/links/extractLinks"; +import { addLinksToMap } from "../lib/links/FileBatch"; + +interface Arguments { + [x: string]: unknown; + globs?: string[]; +} + +const readArgs = (): Arguments => { + return yargs(hideBin(process.argv)) + .version(false) + .command("$0 [globs..]", "") + .parseSync(); +}; + +async function main() { + const args = readArgs(); + const filePaths = await globby(args.globs || []); + + const links = await getLinks(filePaths); + + // We use a for loop to reduce the risk of rate limiting. + let allGood = true; + let numLinksChecked = 1; + for (const link of links) { + const result = await link.check(); + + // This script can be slow, so log progress every 20 links. + if (numLinksChecked % 20 == 0) { + console.log(`Checked ${numLinksChecked} / ${links.length} links`); + } + numLinksChecked++; + + if (result === undefined) continue; + console.error(result); + allGood = false; + } + + if (!allGood) { + console.error( + `\n\nšŸ’” Some external links appear broken, although it's possible they are flakes. Checked ${links.length} links.`, + ); + process.exit(1); + } + console.log( + `\n\nāœ… No external links are broken. Checked ${links.length} links.`, + ); +} + +async function getLinks(filePaths: string[]): Promise { + const linksToOriginFiles = new Map(); + for (const filePath of filePaths) { + const parsed = await parseFile(filePath); + addLinksToMap(filePath, parsed.externalLinks, linksToOriginFiles); + } + return Array.from(linksToOriginFiles.entries()).map( + ([link, originFiles]) => new ExternalLink(link, originFiles), + ); +} + +main().then(() => process.exit()); diff --git a/scripts/commands/checkLinks.ts b/scripts/commands/checkInternalLinks.ts similarity index 96% rename from scripts/commands/checkLinks.ts rename to scripts/commands/checkInternalLinks.ts index 649dfcddbaf..6e49659edbe 100644 --- a/scripts/commands/checkLinks.ts +++ b/scripts/commands/checkInternalLinks.ts @@ -29,7 +29,6 @@ const SYNTHETIC_FILES: string[] = [ interface Arguments { [x: string]: unknown; - external: boolean; currentApis: boolean; devApis: boolean; historicalApis: boolean; @@ -40,12 +39,6 @@ interface Arguments { const readArgs = (): Arguments => { return yargs(hideBin(process.argv)) .version(false) - .option("external", { - type: "boolean", - default: false, - description: - "Should external links be checked? This slows down the script, but is useful to check.", - }) .option("current-apis", { type: "boolean", default: false, @@ -91,7 +84,7 @@ async function main() { let allGood = true; for (const fileBatch of fileBatches) { - const allValidLinks = await fileBatch.check(args.external, otherFiles); + const allValidLinks = await fileBatch.checkInternalLinks(otherFiles); if (!allValidLinks) { allGood = false; } diff --git a/scripts/lib/links/FileBatch.ts b/scripts/lib/links/FileBatch.ts index 557d522d512..969d5e601bd 100644 --- a/scripts/lib/links/FileBatch.ts +++ b/scripts/lib/links/FileBatch.ts @@ -13,7 +13,6 @@ import { globby } from "globby"; import { InternalLink, File } from "./InternalLink"; -import { ExternalLink } from "./ExternalLink"; import { ALWAYS_IGNORED_URLS, FILES_TO_IGNORES, @@ -59,75 +58,50 @@ export class FileBatch { /** * Load and process the file batch. * - * Returns a triplet: + * Returns a pair: * 1. A list of `File` objects with their anchors. These represent * the universe of valid internal links for this batch, other * than any additional we may add at check-time, e.g. images. * 2. A list of InternalLink objects to validate. - * 3. A list of ExternalLink objects to validate. */ - async load( - loadExternalLinks: boolean, - ): Promise<[File[], InternalLink[], ExternalLink[]]> { + async load(): Promise<[File[], InternalLink[]]> { const files: File[] = []; for (let filePath of this.toLoad) { const parsed = await parseFile(filePath); files.push(new File(filePath, parsed.anchors)); } - const internalLinksToOriginFiles = new Map(); - const externalLinksToOriginFiles = new Map(); + const linksToOriginFiles = new Map(); for (const filePath of this.toCheck) { const parsed = await parseFile(filePath); files.push(new File(filePath, parsed.anchors)); - addLinksToMap(filePath, parsed.internalLinks, internalLinksToOriginFiles); - if (loadExternalLinks) { - addLinksToMap( - filePath, - parsed.externalLinks, - externalLinksToOriginFiles, - ); - } + addLinksToMap(filePath, parsed.internalLinks, linksToOriginFiles); } - const internalLinks = Array.from(internalLinksToOriginFiles.entries()).map( + const links = Array.from(linksToOriginFiles.entries()).map( ([link, originFiles]) => new InternalLink(link, originFiles), ); - const externalLinks = Array.from(externalLinksToOriginFiles.entries()).map( - ([link, originFiles]) => new ExternalLink(link, originFiles), - ); - return [files, internalLinks, externalLinks]; + return [files, links]; } /** - * Check that all links in this file batch are valid. + * Check that all internal links in this file batch are valid. * * Logs the results to the console and returns `true` if there were no issues. */ - async check( - checkExternalLinks: boolean, - otherFiles: File[], - ): Promise { - console.log(`\n\nChecking links for ${this.description}`); + async checkInternalLinks(otherFiles: File[]): Promise { + console.log(`\n\nChecking internal links for ${this.description}`); - const [docsFiles, internalLinkList, externalLinkList] = - await this.load(checkExternalLinks); + const [docsFiles, links] = await this.load(); const existingFiles = docsFiles.concat(otherFiles); - const results = internalLinkList.map((link) => link.check(existingFiles)); - - // For loop reduces the risk of rate-limiting. - for (let link of externalLinkList) { - results.push(await link.check()); - } - let allGood = true; - results - .filter((res) => res !== undefined) - .forEach((linkError) => { - console.error(linkError); - allGood = false; - }); + links.forEach((link) => { + const result = link.check(existingFiles); + if (result === undefined) return; + console.error(result); + allGood = false; + }); return allGood; } }