Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Determine internal vs external links at parse time #884

Merged
merged 3 commits into from
Feb 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions scripts/commands/checkLinks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,10 @@ async function main() {

const fileBatches = await determineFileBatches(args);
const otherFiles = [
...(await globby("public/**/*")).map((fp) => new File(fp, [])),
...SYNTHETIC_FILES.map((fp) => new File(fp, [], true)),
...(await globby("public/{images,videos}/**/*")).map(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would include the objects.inv files, which was wrong.

(fp) => new File(fp, new Set()),
),
...SYNTHETIC_FILES.map((fp) => new File(fp, new Set(), true)),
];

let allGood = true;
Expand Down
1 change: 1 addition & 0 deletions scripts/lib/links/ExternalLink.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export class ExternalLink {
try {
const response = await fetch(this.value, {
headers: { "User-Agent": "qiskit-documentation-broken-links-finder" },
method: "HEAD",
});
if (response.status >= 300) {
error = `Could not find link '${this.value}'`;
Expand Down
12 changes: 10 additions & 2 deletions scripts/lib/links/FileBatch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,23 @@ import { addLinksToMap } from "./FileBatch";
test("addLinksToMap()", () => {
const linksToMap = new Map();

addLinksToMap("file1.md", ["https://ibm.com", "./relative"], linksToMap);
addLinksToMap(
"file1.md",
new Set(["https://ibm.com", "./relative"]),
linksToMap,
);
expect(linksToMap).toEqual(
new Map([
["https://ibm.com", ["file1.md"]],
["./relative", ["file1.md"]],
]),
);

addLinksToMap("file2.md", ["./relative", "/images/my_image.png"], linksToMap);
addLinksToMap(
"file2.md",
new Set(["./relative", "/images/my_image.png"]),
linksToMap,
);
expect(linksToMap).toEqual(
new Map([
["https://ibm.com", ["file1.md"]],
Expand Down
69 changes: 36 additions & 33 deletions scripts/lib/links/FileBatch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,43 +66,36 @@ export class FileBatch {
* 2. A list of InternalLink objects to validate.
* 3. A list of ExternalLink objects to validate.
*/
async load(): Promise<[File[], InternalLink[], ExternalLink[]]> {
async load(
loadExternalLinks: boolean,
): Promise<[File[], InternalLink[], ExternalLink[]]> {
const files: File[] = [];
for (let filePath of this.toLoad) {
const parsed = await parseFile(filePath);
files.push(new File(filePath, parsed.anchors));
}

const linksToOriginFiles = new Map<string, string[]>();
const internalLinksToOriginFiles = new Map<string, string[]>();
const externalLinksToOriginFiles = new Map<string, string[]>();
for (const filePath of this.toCheck) {
const parsed = await parseFile(filePath);
files.push(new File(filePath, parsed.anchors));
if (!IGNORED_FILES.has(filePath)) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All three ignores are moved into addLinksToMap.

addLinksToMap(filePath, parsed.rawLinks, linksToOriginFiles);
}
}

const internalLinks: InternalLink[] = [];
const externalLinks: ExternalLink[] = [];
for (let [linkPath, originFiles] of linksToOriginFiles) {
if (ALWAYS_IGNORED_URLS.has(linkPath)) {
continue;
}
originFiles = originFiles.filter(
(originFile) =>
FILES_TO_IGNORES[originFile] == null ||
!FILES_TO_IGNORES[originFile].includes(linkPath),
);

if (originFiles.length > 0) {
if (linkPath.startsWith("http")) {
externalLinks.push(new ExternalLink(linkPath, originFiles));
} else {
internalLinks.push(new InternalLink(linkPath, originFiles));
}
addLinksToMap(filePath, parsed.internalLinks, internalLinksToOriginFiles);
if (loadExternalLinks) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't waste our time storing external links if not necessary. That should slightly decrease memory usage and improve performance.

addLinksToMap(
filePath,
parsed.externalLinks,
externalLinksToOriginFiles,
);
}
}

const internalLinks = Array.from(internalLinksToOriginFiles.entries()).map(
([link, originFiles]) => new InternalLink(link, originFiles),
);
const externalLinks = Array.from(externalLinksToOriginFiles.entries()).map(
([link, originFiles]) => new ExternalLink(link, originFiles),
);
return [files, internalLinks, externalLinks];
}

Expand All @@ -111,19 +104,21 @@ export class FileBatch {
*
* Logs the results to the console and returns `true` if there were no issues.
*/
async check(externalLinks: boolean, otherFiles: File[]): Promise<boolean> {
async check(
checkExternalLinks: boolean,
otherFiles: File[],
): Promise<boolean> {
console.log(`\n\nChecking links for ${this.description}`);

const [docsFiles, internalLinkList, externalLinkList] = await this.load();
const [docsFiles, internalLinkList, externalLinkList] =
await this.load(checkExternalLinks);
const existingFiles = docsFiles.concat(otherFiles);

const results = internalLinkList.map((link) => link.check(existingFiles));

if (externalLinks) {
// For loop reduces the risk of rate-limiting.
for (let link of externalLinkList) {
results.push(await link.check());
}
// For loop reduces the risk of rate-limiting.
for (let link of externalLinkList) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

externalLinkList will be empty if we're not checking external links.

results.push(await link.check());
}

let allGood = true;
Expand All @@ -139,10 +134,18 @@ export class FileBatch {

export function addLinksToMap(
filePath: string,
links: string[],
links: Set<string>,
linksToOriginFiles: Map<string, string[]>,
): void {
if (IGNORED_FILES.has(filePath)) return;
links.forEach((link) => {
if (
ALWAYS_IGNORED_URLS.has(link) ||
FILES_TO_IGNORES[filePath]?.includes(link)
) {
return;
}

const entry = linksToOriginFiles.get(link);
if (entry === undefined) {
linksToOriginFiles.set(link, [filePath]);
Expand Down
34 changes: 20 additions & 14 deletions scripts/lib/links/InternalLink.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ describe("Test the constructor of InternalLink", () => {
describe("Validate links", () => {
test("existing with absolute path", () => {
let testLink = new InternalLink("/testpath", ["/testorigin.mdx"]);
let testFile = new File("docs/testpath.mdx", []);
let testFile = new File("docs/testpath.mdx", new Set());
const results = testLink.check([testFile]);
expect(results).toBeUndefined();
});
Expand All @@ -47,7 +47,7 @@ describe("Validate links", () => {
let testLink = new InternalLink("/test-alternative-path", [
"/testorigin.mdx",
]);
let testFile = new File("docs/testpath.mdx", []);
let testFile = new File("docs/testpath.mdx", new Set());
const results = testLink.check([testFile]);
expect(results).toEqual(
"❌ Could not find link '/test-alternative-path'. Appears in:\n /testorigin.mdx",
Expand All @@ -58,7 +58,7 @@ describe("Validate links", () => {
let testLink = new InternalLink("../testpath", [
"docs/test/testorigin.mdx",
]);
let testFile = new File("docs/testpath.mdx", []);
let testFile = new File("docs/testpath.mdx", new Set());
const results = testLink.check([testFile]);
expect(results).toBeUndefined();
});
Expand All @@ -67,7 +67,7 @@ describe("Validate links", () => {
let testLink = new InternalLink("../testpath", [
"docs/test1/test2/testorigin.mdx",
]);
let testFile = new File("docs/testpath.mdx", []);
let testFile = new File("docs/testpath.mdx", new Set());
const results = testLink.check([testFile]);
expect(results).toEqual(
"❌ Could not find link '../testpath'. Appears in:\n docs/test1/test2/testorigin.mdx",
Expand All @@ -81,8 +81,8 @@ describe("Validate links", () => {
"docs/test/test3/testorigin.mdx",
"docs/test/test2/test4/testorigin.mdx",
]);
let testFile1 = new File("docs/testpath.mdx", []);
let testFile2 = new File("docs/test/test2/testpath.mdx", []);
let testFile1 = new File("docs/testpath.mdx", new Set());
let testFile2 = new File("docs/test/test2/testpath.mdx", new Set());
const results = testLink.check([testFile1, testFile2]);
expect(results).toBeUndefined();
});
Expand All @@ -94,8 +94,8 @@ describe("Validate links", () => {
"docs/test/test3/testorigin.mdx",
"docs/test/test2/test4/testorigin.mdx",
]);
let testFile1 = new File("docs/test/testpath.mdx", []);
let testFile2 = new File("docs/test2/test3/testpath.mdx", []);
let testFile1 = new File("docs/test/testpath.mdx", new Set());
let testFile2 = new File("docs/test2/test3/testpath.mdx", new Set());
const results = testLink.check([testFile1, testFile2]);
expect(results).toEqual(
"❌ Could not find link '/testpath'. Appears in:\n" +
Expand All @@ -115,8 +115,8 @@ describe("Validate links", () => {
"docs/test/test3/testorigin.mdx",
"docs/test/test2/test4/testorigin.mdx",
]);
let testFile1 = new File("docs/testpath.mdx", []);
let testFile2 = new File("docs/test/test2/testpath.mdx", []);
let testFile1 = new File("docs/testpath.mdx", new Set());
let testFile2 = new File("docs/test/test2/testpath.mdx", new Set());
const results = testLink.check([testFile1, testFile2]);
expect(results).toEqual(
"❌ Could not find link '../testpath'. Appears in:\n docs/test/test2/testorigin.mdx\n docs/test/test3/testorigin.mdx",
Expand All @@ -127,7 +127,7 @@ describe("Validate links", () => {
let testLink = new InternalLink("/testpath#test_anchor", [
"/testorigin.mdx",
]);
let testFile = new File("docs/testpath.mdx", ["#test_anchor"]);
let testFile = new File("docs/testpath.mdx", new Set(["#test_anchor"]));
const results = testLink.check([testFile]);
expect(results).toBeUndefined();
});
Expand All @@ -136,7 +136,10 @@ describe("Validate links", () => {
let testLink = new InternalLink("/testpath#test_anchor", [
"/testorigin.mdx",
]);
let testFile = new File("docs/testpath.mdx", ["#test_diff_anchor"]);
let testFile = new File(
"docs/testpath.mdx",
new Set(["#test_diff_anchor"]),
);
const results = testLink.check([testFile]);
expect(results).toEqual(
"❌ Could not find link '/testpath#test_anchor'. Appears in:\n /testorigin.mdx ❓ Did you mean '/testpath#test_diff_anchor'?",
Expand All @@ -147,7 +150,7 @@ describe("Validate links", () => {
let testLink = new InternalLink("../testpath#test_anchor", [
"docs/test/testorigin.mdx",
]);
let testFile = new File("docs/testpath.mdx", ["#test_anchor"]);
let testFile = new File("docs/testpath.mdx", new Set(["#test_anchor"]));
const results = testLink.check([testFile]);
expect(results).toBeUndefined();
});
Expand All @@ -156,7 +159,10 @@ describe("Validate links", () => {
let testLink = new InternalLink("../testpath#test-anchor", [
"docs/test/testorigin.mdx",
]);
let testFile = new File("docs/testpath.mdx", ["#test_diff_anchor"]);
let testFile = new File(
"docs/testpath.mdx",
new Set(["#test_diff_anchor"]),
);
const results = testLink.check([testFile]);
expect(results).toEqual(
"❌ Could not find link '../testpath#test-anchor'. Appears in:\n docs/test/testorigin.mdx ❓ Did you mean '/testpath#test_diff_anchor'?",
Expand Down
8 changes: 4 additions & 4 deletions scripts/lib/links/InternalLink.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ const CONTENT_FILE_EXTENSIONS = [".md", ".mdx", ".ipynb"];

export class File {
readonly path: string;
readonly anchors: string[];
readonly anchors: Set<string>;
readonly synthetic: boolean;

/**
* path: Path to the file
* anchors: Anchors available in the file
*/
constructor(path: string, anchors: string[], synthetic: boolean = false) {
constructor(path: string, anchors: Set<string>, synthetic: boolean = false) {
this.path = path;
this.anchors = anchors;
this.synthetic = synthetic;
Expand Down Expand Up @@ -96,7 +96,7 @@ export class InternalLink {
existingFile.path == filePath &&
(this.anchor == "" ||
existingFile.synthetic == true ||
existingFile.anchors.includes(this.anchor)),
existingFile.anchors.has(this.anchor)),
),
);
}
Expand All @@ -122,7 +122,7 @@ export class InternalLink {
if (score < minScoreLink) {
minScoreLink = score;
suggestionPath = file.path;
suggestionPathAnchors = file.anchors;
suggestionPathAnchors = Array.from(file.anchors);
}
});

Expand Down
28 changes: 14 additions & 14 deletions scripts/lib/links/extractLinks.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,15 @@ test("parseAnchors()", () => {

## \`code-header\`
`);
expect(result).toEqual([
"#my-top-level-heading",
"#header-2",
"#code-header",
"#this-is-a-hardcoded-anchor",
"#another_span",
]);
expect(result).toEqual(
new Set([
"#my-top-level-heading",
"#header-2",
"#code-header",
"#this-is-a-hardcoded-anchor",
"#another_span",
]),
);
});

test("parseLinks()", async () => {
Expand All @@ -81,11 +83,9 @@ test("parseLinks()", async () => {

<a href="./explicit-anchor">Explicit anchor</a>
`;
const result = await parseLinks(markdown);
expect(result).toEqual([
"https://ibm.com",
"./relative",
"/images/my_image.png",
"./explicit-anchor",
]);
const [internalLinks, externalLinks] = await parseLinks(markdown);
expect(internalLinks).toEqual(
new Set(["./relative", "/images/my_image.png", "./explicit-anchor"]),
);
expect(externalLinks).toEqual(new Set(["https://ibm.com"]));
});
Loading
Loading