Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement link checker #1207

Merged
merged 13 commits into from
Oct 3, 2023
45 changes: 45 additions & 0 deletions .github/workflows/broken-links-check.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: check-links

on:
workflow_dispatch:
schedule:
- cron: '0 0 * * 1'

jobs:
check-links:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
with:
node-version: 16
cache: 'yarn'
- name: Allow modern Yarn
run: |
corepack enable
- name: Install dependencies
run: |
yarn
- name: Install cli
run: cd cli && yarn && yarn build && cd ..
- name: Check Links
id: check-links
# Run link checker and save outputs to files. https://stackoverflow.com/a/692407
run: |
yarn lint:links > >(tee -a stdout.txt) 2> >(tee -a stderr.txt >&2)
- name: Create issue
uses: actions/[email protected]
if: always() && steps.check-links.outcome == 'failure'
with:
script: |
const fs = require('fs');
const input = fs.readFileSync('stderr.txt');
const data = JSON.parse(input);
const urls = data.map(item => item.url);
const output = urls.map(url => `- [ ] ${url}`).join('\n');
github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: 'Fix broken links',
body: `The following links are possibly broken:\n${output}`,
});
2 changes: 2 additions & 0 deletions cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
"ink-spinner": "^4.0.3",
"ink-text-input": "^4.0.3",
"isomorphic-git": "^1.17.2",
"link-check": "^5.2.0",
"node-html-parser": "^6.1.10",
"prettier": "2.8.8",
"remark": "^14.0.2",
"remark-lint-no-dead-urls": "^1.1.0",
Expand Down
122 changes: 116 additions & 6 deletions cli/src/commands/check.tsx
Original file line number Diff line number Diff line change
@@ -1,12 +1,33 @@
import { Command, Option } from 'clipanion';
import path from 'path';
import visit from 'unist-util-visit';
import { Node } from 'unist';
import checkUrl from 'link-check';
import { parse } from 'node-html-parser';

const buildConfig = require.resolve('../docusaurus/config/build.js');
const SUPPORTED_PLUGINS = [
'@docusaurus/plugin-content-docs',
'@docusaurus/plugin-content-pages',
'@iota-wiki/plugin-docs',
];
const SUPPORTED_PROTOCOLS = ['http:', 'https:'];

interface LinkNode extends Node {
url: string;
}

interface JsxOrHtmlNode extends Node {
value: string;
}

function isLinkNode(node: Node): node is LinkNode {
return node.type === 'link';
}

function isJsxOrHtmlNode(node: Node): node is JsxOrHtmlNode {
return node.type === 'jsx' || node.type === 'html';
}

export class Check extends Command {
static paths = [[`check`]];
Expand All @@ -20,6 +41,7 @@ export class Check extends Command {
});

async execute() {
console.time('check');
const { engine } = await import('unified-engine');
const { remark } = await import('remark');

Expand All @@ -43,21 +65,109 @@ export class Check extends Command {
return pluginPaths;
}, []);

return await new Promise<number>((resolve, reject) =>
const urls = [];

// A Remark plugin that visits all link-like elements, extracts the URLs,
// and adds them to the `urls` array.
function RemarkLinkVisitor() {
return async (tree) => {
visit(tree, (node) => {
if (isLinkNode(node)) urls.push(node.url);
if (isJsxOrHtmlNode(node)) {
const element = parse(node.value);
if (element.tagName === 'a')
urls.push(element.getAttribute('href'));
}
});
return tree;
};
}

// The Unified engine that runs the RemarkLinkVisitor plugin on all
// reachable markdown files.
await new Promise<number>((resolve, reject) =>
engine(
{
processor: remark(),
files: pluginPaths,
files: [pluginPaths],
extensions: ['md', 'mdx'],
plugins: ['remark-lint-no-dead-urls'],
color: true,
quiet: true,
frail: true,
plugins: [RemarkLinkVisitor],
silent: true,
},
(error, status) => {
error ? reject(error) : resolve(status);
},
),
);
const results = [];

// Validate URLs, extract the remote URLs, and gather them by hostname.
const urlsByHostname = urls.reduce<Map<string, Set<string>>>(
(urlsByHostname, url) => {
let validUrl: URL;

try {
validUrl = new URL(url);
} catch (error) {
results.push({ status: 'error', url, message: error });
return urlsByHostname;
}

if (
validUrl.hostname !== 'localhost' &&
SUPPORTED_PROTOCOLS.includes(validUrl.protocol)
) {
if (urlsByHostname.has(validUrl.hostname)) {
urlsByHostname.get(validUrl.hostname).add(validUrl.href);
} else {
urlsByHostname.set(validUrl.hostname, new Set([validUrl.href]));
}
} else {
results.push({ status: 'error', url, message: 'ignored' });
}

return urlsByHostname;
},
new Map<string, Set<string>>(),
);

// Check all links for liveness. The checking is done in sequence per hostname
// to prevent getting rate limitted on a hostname.
// It will build an array of objects with a status of `alive`, `dead`, or `error` and
// a message providing info about the result.
await Promise.all(
Array.from(urlsByHostname.values()).map(async (urls) => {
for (const url of Array.from(urls)) {
await new Promise<void>((resolve) => {
checkUrl(url, (error, result) => {
if (error) results.push({ status: 'error', url, message: error });
else
results.push({
status: result.status,
url,
message: result.statusCode,
});
resolve();
});
});
}
}),
);

const segragatedResults = {
error: results.filter(({ status }) => status === 'error'),
alive: results.filter(({ status }) => status === 'alive'),
dead: results.filter(({ status }) => status === 'dead'),
};

console.error(`${JSON.stringify(segragatedResults.dead, null, 4)}`);

console.log(
`alive: ${segragatedResults.alive.length}, dead: ${segragatedResults.dead.length}, error: ${segragatedResults.error.length}`,
);

console.timeEnd('check');

return segragatedResults.dead.length > 0 ? 1 : 0;
}
}
67 changes: 58 additions & 9 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2715,6 +2715,8 @@ __metadata:
ink-spinner: ^4.0.3
ink-text-input: ^4.0.3
isomorphic-git: ^1.17.2
link-check: ^5.2.0
node-html-parser: ^6.1.10
nodemon: ^2.0.16
prettier: 2.8.8
raw-loader: ^4.0.2
Expand Down Expand Up @@ -10067,7 +10069,7 @@ __metadata:
languageName: node
linkType: hard

"he@npm:^1.2.0":
"he@npm:1.2.0, he@npm:^1.2.0":
version: 1.2.0
resolution: "he@npm:1.2.0"
bin:
Expand Down Expand Up @@ -10732,6 +10734,13 @@ __metadata:
languageName: node
linkType: hard

"is-absolute-url@npm:^4.0.1":
version: 4.0.1
resolution: "is-absolute-url@npm:4.0.1"
checksum: de172a718439982a54477fdae55f21be69ec0e6a4b205db5484975d2f4ee749851fd46c28f3790dfc51a274c2ed1d0f8457b6d1fff02ab829069fd9cc761e48c
languageName: node
linkType: hard

"is-alphabetical@npm:1.0.4, is-alphabetical@npm:^1.0.0":
version: 1.0.4
resolution: "is-alphabetical@npm:1.0.4"
Expand Down Expand Up @@ -11079,6 +11088,15 @@ __metadata:
languageName: node
linkType: hard

"is-relative-url@npm:^4.0.0":
version: 4.0.0
resolution: "is-relative-url@npm:4.0.0"
dependencies:
is-absolute-url: ^4.0.1
checksum: 9556fc1d7d88b147428f237408a02ecab2a9bf45e2b93cf17069ada5207ff4050dcd5c3a1cf9bbc8c9133a858de9228a0c6c9c45a6a84eb0b98e210cacbe99ea
languageName: node
linkType: hard

"is-root@npm:^2.1.0":
version: 2.1.0
resolution: "is-root@npm:2.1.0"
Expand Down Expand Up @@ -11207,6 +11225,15 @@ __metadata:
languageName: node
linkType: hard

"isemail@npm:^3.2.0":
version: 3.2.0
resolution: "isemail@npm:3.2.0"
dependencies:
punycode: 2.x.x
checksum: 77adfbe8d6b3f9970c37516e008fd1c2e33be186f4cd09b31daf37a9fa8f82adc0dd8ce4bd12818f0c0e63018f9ec0c22490793d704ed635770c98a9d48b21c4
languageName: node
linkType: hard

"isexe@npm:^2.0.0":
version: 2.0.0
resolution: "isexe@npm:2.0.0"
Expand Down Expand Up @@ -11634,6 +11661,18 @@ __metadata:
languageName: node
linkType: hard

"link-check@npm:^5.2.0":
version: 5.2.0
resolution: "link-check@npm:5.2.0"
dependencies:
is-relative-url: ^4.0.0
isemail: ^3.2.0
ms: ^2.1.3
needle: ^3.1.0
checksum: 037488e824b830975159bf2ae1f69c5b92eb11d1c2518096c4a1d7e3f67b8aca20ff2a5bcf17cbf22ebeebe302141e08f54c550c1c04ede6f802d936be04fc94
languageName: node
linkType: hard

"liquid-json@npm:0.3.1":
version: 0.3.1
resolution: "liquid-json@npm:0.3.1"
Expand Down Expand Up @@ -12781,7 +12820,7 @@ __metadata:
languageName: node
linkType: hard

"ms@npm:2.1.3, ms@npm:^2.0.0, ms@npm:^2.1.1":
"ms@npm:2.1.3, ms@npm:^2.0.0, ms@npm:^2.1.1, ms@npm:^2.1.3":
version: 2.1.3
resolution: "ms@npm:2.1.3"
checksum: aa92de608021b242401676e35cfa5aa42dd70cbdc082b916da7fb925c542173e36bce97ea3e804923fe92c0ad991434e4a38327e15a1b5b5f945d66df615ae6d
Expand Down Expand Up @@ -12947,6 +12986,16 @@ __metadata:
languageName: node
linkType: hard

"node-html-parser@npm:^6.1.10":
version: 6.1.10
resolution: "node-html-parser@npm:6.1.10"
dependencies:
css-select: ^5.1.0
he: 1.2.0
checksum: 927f6a38b3b1cbc042bce609e24fb594d3b1e0f1067ffb416a925fa5a699e907be31980f349e094d55bab706dc16a71958b08f8dcdab62faf7b12013f29442bc
languageName: node
linkType: hard

"node-polyfill-webpack-plugin@npm:^2.0.1":
version: 2.0.1
resolution: "node-polyfill-webpack-plugin@npm:2.0.1"
Expand Down Expand Up @@ -14569,20 +14618,20 @@ plugin-image-zoom@flexanalytics/plugin-image-zoom:
languageName: node
linkType: hard

"punycode@npm:2.x.x, punycode@npm:^2.1.0, punycode@npm:^2.1.1":
version: 2.3.0
resolution: "punycode@npm:2.3.0"
checksum: 39f760e09a2a3bbfe8f5287cf733ecdad69d6af2fe6f97ca95f24b8921858b91e9ea3c9eeec6e08cede96181b3bb33f95c6ffd8c77e63986508aa2e8159fa200
languageName: node
linkType: hard

"punycode@npm:^1.3.2, punycode@npm:^1.4.1":
version: 1.4.1
resolution: "punycode@npm:1.4.1"
checksum: fa6e698cb53db45e4628559e557ddaf554103d2a96a1d62892c8f4032cd3bc8871796cae9eabc1bc700e2b6677611521ce5bb1d9a27700086039965d0cf34518
languageName: node
linkType: hard

"punycode@npm:^2.1.0, punycode@npm:^2.1.1":
version: 2.3.0
resolution: "punycode@npm:2.3.0"
checksum: 39f760e09a2a3bbfe8f5287cf733ecdad69d6af2fe6f97ca95f24b8921858b91e9ea3c9eeec6e08cede96181b3bb33f95c6ffd8c77e63986508aa2e8159fa200
languageName: node
linkType: hard

"pupa@npm:^2.1.1":
version: 2.1.1
resolution: "pupa@npm:2.1.1"
Expand Down