From 48ef575a6d2b4ffe44b896b09b21b86337b19167 Mon Sep 17 00:00:00 2001
From: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com>
Date: Fri, 1 Nov 2024 14:17:21 -0400
Subject: [PATCH] Link checker parses headers inside component
---
scripts/js/lib/links/extractLinks.test.ts | 21 ++++++++++++++++++
scripts/js/lib/links/extractLinks.ts | 27 ++++++++++++++++++-----
2 files changed, 43 insertions(+), 5 deletions(-)
diff --git a/scripts/js/lib/links/extractLinks.test.ts b/scripts/js/lib/links/extractLinks.test.ts
index 44626d066a6..8da3804329e 100644
--- a/scripts/js/lib/links/extractLinks.test.ts
+++ b/scripts/js/lib/links/extractLinks.test.ts
@@ -29,6 +29,16 @@ test("parseAnchors()", () => {
## \`code-header\`
+ ## Header.with periods-and wild! punctuation?? and numbers 1234 8 (parentheses)
+
+ ## header_using\_underscores
+
+ ## UpperCase Should Be lowercase
+
+ ## repeated
+ ## repeated
+ ## repeated
+
Convert to dictionary.
@@ -36,15 +46,26 @@ test("parseAnchors()", () => {
\`Dict\`
+
+
+ ### Header inside a component
+
`);
expect(result).toEqual(
new Set([
"#my-top-level-heading",
"#header-2",
"#code-header",
+ "#headerwith-periods-and-wild-punctuation-and-numbers-1234-8-parentheses",
+ "#header_using_underscores",
+ "#uppercase-should-be-lowercase",
"#this-is-a-hardcoded-anchor",
"#another_span",
"#mdx.component.testId",
+ "#header-inside-a-component",
+ "#repeated",
+ "#repeated-1",
+ "#repeated-2",
]),
);
});
diff --git a/scripts/js/lib/links/extractLinks.ts b/scripts/js/lib/links/extractLinks.ts
index 822a2d21728..5f4a7d8a3ea 100644
--- a/scripts/js/lib/links/extractLinks.ts
+++ b/scripts/js/lib/links/extractLinks.ts
@@ -35,11 +35,28 @@ export type ParsedFile = {
};
export function parseAnchors(markdown: string): Set {
- // Anchors generated from markdown titles.
- const mdAnchors = markdownLinkExtractor(markdown).anchors;
- // Anchors from HTML id tags.
- const idAnchors = markdown.match(/(?<=id=")(.+?)(?=")/gm) || [];
- return new Set([...mdAnchors, ...idAnchors.map((id) => `#${id}`)]);
+ const lines = markdown.split("\n");
+ const anchors = new Set();
+ for (const line of lines) {
+ const heading = line.match(/^\s*#{1,6}\s+(.+?)\s*$/);
+ if (heading) {
+ const normalized = heading[1]
+ .toLowerCase()
+ .trim()
+ .replaceAll(" ", "-")
+ .replaceAll(/[\.,;!?`\\\(\)]/g, "");
+ let deduplicated = normalized;
+ let i = 1;
+ while (anchors.has(`#${deduplicated}`)) {
+ deduplicated = `${normalized}-${i}`;
+ i += 1;
+ }
+ anchors.add(`#${deduplicated}`);
+ }
+ const id = line.match(/(?<=id=")(.+?)(?=")/);
+ if (id) anchors.add(`#${id[1]}`);
+ }
+ return anchors;
}
export async function parseLinks(