From 48ef575a6d2b4ffe44b896b09b21b86337b19167 Mon Sep 17 00:00:00 2001 From: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:17:21 -0400 Subject: [PATCH] Link checker parses headers inside component --- scripts/js/lib/links/extractLinks.test.ts | 21 ++++++++++++++++++ scripts/js/lib/links/extractLinks.ts | 27 ++++++++++++++++++----- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/scripts/js/lib/links/extractLinks.test.ts b/scripts/js/lib/links/extractLinks.test.ts index 44626d066a6..8da3804329e 100644 --- a/scripts/js/lib/links/extractLinks.test.ts +++ b/scripts/js/lib/links/extractLinks.test.ts @@ -29,6 +29,16 @@ test("parseAnchors()", () => { ## \`code-header\` + ## Header.with periods-and wild! punctuation?? and numbers 1234 8 (parentheses) + + ## header_using\_underscores + + ## UpperCase Should Be lowercase + + ## repeated + ## repeated + ## repeated + Convert to dictionary. @@ -36,15 +46,26 @@ test("parseAnchors()", () => { \`Dict\` + + + ### Header inside a component + `); expect(result).toEqual( new Set([ "#my-top-level-heading", "#header-2", "#code-header", + "#headerwith-periods-and-wild-punctuation-and-numbers-1234-8-parentheses", + "#header_using_underscores", + "#uppercase-should-be-lowercase", "#this-is-a-hardcoded-anchor", "#another_span", "#mdx.component.testId", + "#header-inside-a-component", + "#repeated", + "#repeated-1", + "#repeated-2", ]), ); }); diff --git a/scripts/js/lib/links/extractLinks.ts b/scripts/js/lib/links/extractLinks.ts index 822a2d21728..5f4a7d8a3ea 100644 --- a/scripts/js/lib/links/extractLinks.ts +++ b/scripts/js/lib/links/extractLinks.ts @@ -35,11 +35,28 @@ export type ParsedFile = { }; export function parseAnchors(markdown: string): Set { - // Anchors generated from markdown titles. - const mdAnchors = markdownLinkExtractor(markdown).anchors; - // Anchors from HTML id tags. - const idAnchors = markdown.match(/(?<=id=")(.+?)(?=")/gm) || []; - return new Set([...mdAnchors, ...idAnchors.map((id) => `#${id}`)]); + const lines = markdown.split("\n"); + const anchors = new Set(); + for (const line of lines) { + const heading = line.match(/^\s*#{1,6}\s+(.+?)\s*$/); + if (heading) { + const normalized = heading[1] + .toLowerCase() + .trim() + .replaceAll(" ", "-") + .replaceAll(/[\.,;!?`\\\(\)]/g, ""); + let deduplicated = normalized; + let i = 1; + while (anchors.has(`#${deduplicated}`)) { + deduplicated = `${normalized}-${i}`; + i += 1; + } + anchors.add(`#${deduplicated}`); + } + const id = line.match(/(?<=id=")(.+?)(?=")/); + if (id) anchors.add(`#${id[1]}`); + } + return anchors; } export async function parseLinks(