From 411b8c30d906d0e13ef2aa1a9e47a29352932a2c Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Fri, 27 Oct 2023 18:55:29 +0200 Subject: [PATCH 01/19] :sparkles: add react-remark library, test it in SimpleMarkdownText From 94b27f2534f8e1661bbe08c419534a1827fd3c40 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Fri, 27 Oct 2023 22:44:17 +0200 Subject: [PATCH 02/19] :sparkles: replace our svg markdown parser with mdast parser --- package.json | 2 + .../components/src/GdocsUtils.ts | 142 --- .../MarkdownTextWrap.stories.tsx | 10 +- .../src/MarkdownTextWrap/MarkdownTextWrap.tsx | 367 ++++-- .../src/MarkdownTextWrap/parser.test.ts | 1037 ----------------- .../components/src/MarkdownTextWrap/parser.ts | 572 --------- .../@ourworldindata/components/src/index.ts | 11 - yarn.lock | 340 ++++++ 8 files changed, 645 insertions(+), 1836 deletions(-) delete mode 100644 packages/@ourworldindata/components/src/MarkdownTextWrap/parser.test.ts delete mode 100644 packages/@ourworldindata/components/src/MarkdownTextWrap/parser.ts diff --git a/package.json b/package.json index 13a836cc626..c5572c2d266 100644 --- a/package.json +++ b/package.json @@ -82,6 +82,7 @@ "@types/jsonwebtoken": "^9.0.0", "@types/lodash": "^4.14.185", "@types/md5": "^2.3.2", + "@types/mdast": "^4.0.2", "@types/minimist": "^1.2.2", "@types/mousetrap": "^1.6.9", "@types/mysql": "^2.15.21", @@ -154,6 +155,7 @@ "lodash": "^4.17.20", "mathjax-full": "^3.1.0", "md5": "^2.3.0", + "mdast-util-from-markdown": "^2.0.0", "minimist": "^1.2.6", "mobx": "^5.15.7", "mobx-formatters": "^1.0.2", diff --git a/packages/@ourworldindata/components/src/GdocsUtils.ts b/packages/@ourworldindata/components/src/GdocsUtils.ts index 4eeaff0e7d1..ac040b0a8f2 100644 --- a/packages/@ourworldindata/components/src/GdocsUtils.ts +++ b/packages/@ourworldindata/components/src/GdocsUtils.ts @@ -1,18 +1,11 @@ import { spansToUnformattedPlainText, gdocUrlRegex, - EnrichedBlockText, OwidGdocLinkJSON, Span, Url, } from "@ourworldindata/utils" import urlSlug from "url-slug" -import { - EveryMarkdownNode, - MarkdownRoot, - mdParser, -} from "./MarkdownTextWrap/parser.js" -import { P, match } from "ts-pattern" export function getLinkType(urlString: string): OwidGdocLinkJSON["linkType"] { const url = Url.fromURL(urlString) @@ -50,138 +43,3 @@ export function getUrlTarget(urlString: string): string { export function convertHeadingTextToId(headingText: Span[]): string { return urlSlug(spansToUnformattedPlainText(headingText)) } - -const convertMarkdownNodeToSpan = (node: EveryMarkdownNode): Span[] => { - return match(node) - .with( - { - type: "text", - }, - (n) => [ - { - spanType: "span-simple-text" as const, - text: n.value, - } as Span, - ] - ) - .with( - { - type: "textSegments", - }, - (n) => n.children.flatMap(convertMarkdownNodeToSpan) as Span[] - ) - .with( - { - type: "newline", - }, - () => [ - { - spanType: "span-simple-text" as const, - text: "\n", - } as Span, - ] - ) - .with( - { - type: "whitespace", - }, - () => [ - { - spanType: "span-simple-text" as const, - text: " ", - } as Span, - ] - ) - .with( - { - type: "detailOnDemand", - }, - (n) => [ - { - spanType: "span-dod" as const, - id: n.term, - children: n.children.flatMap(convertMarkdownNodeToSpan), - } as Span, - ] - ) - .with( - { - type: "markdownLink", - }, - (n) => [ - { - spanType: "span-link" as const, - url: n.href, - children: n.children.flatMap(convertMarkdownNodeToSpan), - } as Span, - ] - ) - .with( - { - type: "plainUrl", - }, - (n) => [ - { - spanType: "span-link" as const, - url: n.href, - children: [ - { - spanType: "span-simple-text" as const, - text: n.href, - }, - ], - } as Span, - ] - ) - .with( - { - type: P.union("italic", "plainItalic", "italicWithoutBold"), - }, - (n) => [ - { - spanType: "span-italic" as const, - children: n.children.flatMap(convertMarkdownNodeToSpan), - } as Span, - ] - ) - .with( - { - type: P.union("bold", "plainBold", "boldWithoutItalic"), - }, - (n) => [ - { - spanType: "span-bold" as const, - children: n.children.flatMap(convertMarkdownNodeToSpan), - } as Span, - ] - ) - .exhaustive() - //.otherwise(() => ({ spanType: "span-simple-text" as const, text: "" })) -} - -const convertMarkdownNodesToSpans = (nodes: MarkdownRoot): Span[] => - nodes.children.flatMap(convertMarkdownNodeToSpan) - -export const markdownToEnrichedTextBlock = ( - markdown: string -): EnrichedBlockText => { - const parsedMarkdown = mdParser.markdown.parse(markdown) - if (parsedMarkdown.status) { - const spans = convertMarkdownNodesToSpans(parsedMarkdown.value) - return { - type: "text", - value: spans, - parseErrors: [], - } - } else - return { - type: "text", - value: [], - parseErrors: [ - { - message: `Failed to parse markdown - expected ${parsedMarkdown.expected} at ${parsedMarkdown.index}`, - isWarning: false, - }, - ], - } -} diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.stories.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.stories.tsx index 9397766a348..314fa6b32c0 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.stories.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.stories.tsx @@ -3,10 +3,9 @@ import { action, computed, observable } from "mobx" import { observer } from "mobx-react" import { MarkdownTextWrap, - parsimmonToTextTokens, IRToken, + convertMarkdownToIRTokens, } from "./MarkdownTextWrap" -import { mdParser } from "./parser.js" import { TextWrap } from "../TextWrap/TextWrap.js" export default { @@ -54,11 +53,8 @@ _THE END_ } @computed get tokens(): IRToken[] { - const result = mdParser.markdown.parse(this.markdown) - if (result.status) { - return parsimmonToTextTokens(result.value.children) - } - return [] + const result = convertMarkdownToIRTokens(this.markdown) + return result } render(): JSX.Element { diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 1b70e5bd10e..6344d299320 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -1,6 +1,5 @@ import React, { CSSProperties } from "react" import { computed } from "mobx" -import { EveryMarkdownChildNode, MarkdownRoot, mdParser } from "./parser.js" import { excludeUndefined, last, @@ -13,6 +12,9 @@ import { FontFamily, } from "@ourworldindata/utils" import { TextWrap } from "../TextWrap/TextWrap.js" +import { fromMarkdown } from "mdast-util-from-markdown" +import { Root, Node, RootContentMap } from "mdast" +import { match } from "ts-pattern" const SUPERSCRIPT_NUMERALS = { "0": "\u2070", @@ -493,62 +495,62 @@ export const sumTextWrapHeights = ( sum(elements.map((element) => element.height)) + (elements.length - 1) * spacer -export function parsimmonToTextTokens( - nodes: EveryMarkdownChildNode[], - fontParams?: IRFontParams -): IRToken[] { - return nodes.map((node): IRToken => { - if (node.type === "text") { - return new IRText(node.value, fontParams) - } else if (node.type === "newline") { - return new IRLineBreak() - } else if (node.type === "whitespace") { - return new IRWhitespace(fontParams) - } else if ( - node.type === "bold" || - node.type === "plainBold" || - node.type === "boldWithoutItalic" - ) { - return new IRBold( - parsimmonToTextTokens(node.children, { - ...fontParams, - fontWeight: 700, - }) - ) - } else if ( - node.type === "italic" || - node.type === "plainItalic" || - node.type === "italicWithoutBold" - ) { - return new IRItalic( - parsimmonToTextTokens(node.children, { - ...fontParams, - isItalic: true, - }) - ) - } else if (node.type === "plainUrl") { - return new IRLink( - node.href, - parsimmonToTextTokens( - [{ type: "text", value: node.href }], - fontParams - ) - ) - } else if (node.type === "markdownLink") { - return new IRLink( - node.href, - parsimmonToTextTokens(node.children, fontParams) - ) - } else if (node.type === "detailOnDemand") { - return new IRDetailOnDemand( - node.term, - parsimmonToTextTokens(node.children, fontParams) - ) - } else { - throw new Error(`Unknown node type: ${(node as any).type}`) - } - }) -} +// export function parsimmonToTextTokens( +// nodes: EveryMarkdownChildNode[], +// fontParams?: IRFontParams +// ): IRToken[] { +// return nodes.map((node): IRToken => { +// if (node.type === "text") { +// return new IRText(node.value, fontParams) +// } else if (node.type === "newline") { +// return new IRLineBreak() +// } else if (node.type === "whitespace") { +// return new IRWhitespace(fontParams) +// } else if ( +// node.type === "bold" || +// node.type === "plainBold" || +// node.type === "boldWithoutItalic" +// ) { +// return new IRBold( +// parsimmonToTextTokens(node.children, { +// ...fontParams, +// fontWeight: 700, +// }) +// ) +// } else if ( +// node.type === "italic" || +// node.type === "plainItalic" || +// node.type === "italicWithoutBold" +// ) { +// return new IRItalic( +// parsimmonToTextTokens(node.children, { +// ...fontParams, +// isItalic: true, +// }) +// ) +// } else if (node.type === "plainUrl") { +// return new IRLink( +// node.href, +// parsimmonToTextTokens( +// [{ type: "text", value: node.href }], +// fontParams +// ) +// ) +// } else if (node.type === "markdownLink") { +// return new IRLink( +// node.href, +// parsimmonToTextTokens(node.children, fontParams) +// ) +// } else if (node.type === "detailOnDemand") { +// return new IRDetailOnDemand( +// node.term, +// parsimmonToTextTokens(node.children, fontParams) +// ) +// } else { +// throw new Error(`Unknown node type: ${(node as any).type}`) +// } +// }) +// } type MarkdownTextWrapProps = { text: string @@ -584,21 +586,18 @@ export class MarkdownTextWrap extends React.Component { @computed get detailsOrderedByReference(): Set { return this.props.detailsOrderedByReference || new Set() } - @computed get ast(): MarkdownRoot["children"] { - if (!this.text) return [] - const result = mdParser.markdown.parse(this.props.text) - if (result.status) { - return result.value.children - } - return [] - } @computed get plaintext(): string { return this.htmlLines.map(lineToPlaintext).join("\n") } + @computed get tokensFromMarkdown(): IRToken[] { + const tokens = convertMarkdownToIRTokens(this.text) + return tokens + } + @computed get htmlLines(): IRToken[][] { - const tokens = parsimmonToTextTokens(this.ast, this.fontParams) + const tokens = this.tokensFromMarkdown const lines = splitIntoLines(tokens, this.maxWidth) return lines.map(recursiveMergeTextTokens) } @@ -640,7 +639,7 @@ export class MarkdownTextWrap extends React.Component { return appendedTokens } - const tokens = parsimmonToTextTokens(this.ast, this.fontParams) + const tokens = this.tokensFromMarkdown const tokensWithReferenceNumbers = appendReferenceNumbers(tokens) return splitIntoLines(tokensWithReferenceNumbers, this.maxWidth) } @@ -740,3 +739,237 @@ function MarkdownTextWrapLine({ line }: { line: IRToken[] }): JSX.Element { ) } + +export function convertMarkdownToIRTokens(markdown: string): IRToken[] { + const ast = fromMarkdown(markdown) + return convertMarkdownRootToIRTokens(ast) +} + +function convertMarkdownRootToIRTokens(node: Root): IRToken[] { + return node.children.flatMap(convertMarkdownNodeToIRTokens) +} + +function convertMarkdownNodeToIRTokens( + node: RootContentMap[keyof RootContentMap] +): IRToken[] { + const converted = match(node) + .with( + { + type: "blockquote", + }, + (item) => { + return item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child) + ) + } + ) + .with( + { + type: "break", + }, + (item) => { + return [new IRLineBreak()] + } + ) + .with( + { + type: "code", + }, + (item) => { + return [new IRText(item.value)] + } + ) + .with( + { + type: "emphasis", + }, + (item) => { + return [ + new IRItalic( + item.children.flatMap(convertMarkdownNodeToIRTokens) + ), + ] + } + ) + .with( + { + type: "heading", + }, + (item) => { + return item.children.flatMap(convertMarkdownNodeToIRTokens) + } + ) + .with( + { + type: "html", + }, + (item) => { + return [new IRText(item.value)] + } + ) + .with( + { + type: "image", + }, + (item) => { + return [new IRText(item.alt ?? "")] + } + ) + .with( + { + type: "inlineCode", + }, + (item) => { + return [new IRText(item.value)] + } + ) + .with( + { + type: "link", + }, + (item) => { + return [ + new IRLink( + item.url, + item.children.flatMap(convertMarkdownNodeToIRTokens) + ), + ] + } + ) + .with( + { + type: "list", + }, + (item) => { + return item.children.flatMap(convertMarkdownNodeToIRTokens) + } + ) + .with( + { + type: "listItem", + }, + (item) => { + return item.children.flatMap(convertMarkdownNodeToIRTokens) + } + ) + .with( + { + type: "paragraph", + }, + (item) => { + return item.children.flatMap(convertMarkdownNodeToIRTokens) + } + ) + .with( + { + type: "strong", + }, + (item) => { + return [ + new IRBold( + item.children.flatMap(convertMarkdownNodeToIRTokens) + ), + ] + } + ) + .with( + { + type: "text", + }, + (item) => { + return [new IRText(item.value)] + } + ) + .with( + { + type: "thematicBreak", + }, + (item) => { + return [new IRText("---")] + } + ) + .with( + { + type: "delete", + }, + (item) => { + return item.children.flatMap(convertMarkdownNodeToIRTokens) + } + ) + // Now lets finish this with blocks for FootnoteDefinition, Definition, ImageReference, LinkReference, FootnoteReference, and Table + .with( + { + type: "footnoteDefinition", + }, + (item) => { + return item.children.flatMap(convertMarkdownNodeToIRTokens) + } + ) + .with( + { + type: "definition", + }, + (item) => { + return [new IRText(`${item.identifier}: ${item.label}`)] + } + ) + .with( + { + type: "imageReference", + }, + (item) => { + return [new IRText(`${item.identifier}: ${item.label}`)] + } + ) + .with( + { + type: "linkReference", + }, + (item) => { + return [new IRText(`${item.identifier}: ${item.label}`)] + } + ) + .with( + { + type: "footnoteReference", + }, + (item) => { + return [new IRText(`${item.identifier}: ${item.label}`)] + } + ) + .with( + { + type: "table", + }, + (item) => { + return item.children.flatMap(convertMarkdownNodeToIRTokens) + } + ) + .with( + { + type: "tableCell", + }, + (item) => { + return item.children.flatMap(convertMarkdownNodeToIRTokens) + } + ) + // and now TableRow and Yaml + .with( + { + type: "tableRow", + }, + (item) => { + return item.children.flatMap(convertMarkdownNodeToIRTokens) + } + ) + .with( + { + type: "yaml", + }, + (item) => { + return [new IRText(item.value)] + } + ) + .exhaustive() + return converted +} diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/parser.test.ts b/packages/@ourworldindata/components/src/MarkdownTextWrap/parser.test.ts deleted file mode 100644 index 4f87a89b3f0..00000000000 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/parser.test.ts +++ /dev/null @@ -1,1037 +0,0 @@ -import { mdParser } from "./parser" - -describe("mdast parsers", () => { - it("mdParser works for non-link brackets", () => { - expect(mdParser.markdown.parse("[some text]")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "[", - }, - { - type: "text", - value: "some", - }, - { - type: "whitespace", - }, - - { - type: "text", - value: "text", - }, - { - type: "text", - value: "]", - }, - ], - }, - }) - }) - it("mdParser works for funky characters in dod texts", () => { - expect( - mdParser.markdown.parse("[int.$ *?=šŸ˜›Ā§&/%Ć¼ā‚¬](#dod:term)") - ).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "detailOnDemand", - children: [ - { - type: "text", - value: "int.$", - }, - { - type: "whitespace", - }, - { - type: "text", - value: "*?=šŸ˜›Ā§&/%Ć¼ā‚¬", - }, - ], - term: "term", - }, - ], - }, - }) - }) - it("mdParser can parse a word", () => { - expect(mdParser.markdown.parse("word")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "word", - }, - ], - }, - }) - }) - it("mdParser can parse words with punctuation", () => { - expect(mdParser.markdown.parse("can't?")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "can't?", - }, - ], - }, - }) - - expect(mdParser.markdown.parse("'mid-west'")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "'mid-west'", - }, - ], - }, - }) - }) - it("mdParser can parse a word with bold", () => { - expect(mdParser.markdown.parse("**I'm bold as brass**")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "bold", - children: [ - { type: "text", value: "I'm" }, - { type: "whitespace" }, - { type: "text", value: "bold" }, - { type: "whitespace" }, - { type: "text", value: "as" }, - { type: "whitespace" }, - { type: "text", value: "brass" }, - ], - }, - ], - }, - }) - }) - it("mdParser can parse a phrase with italics", () => { - expect(mdParser.markdown.parse("_Mamma mia!_")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "italic", - children: [ - { type: "text", value: "Mamma" }, - { type: "whitespace" }, - { type: "text", value: "mia!" }, - ], - }, - ], - }, - }) - }) - it("mdParser can parse URLs", () => { - expect(mdParser.markdown.parse("www.google.com")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "plainUrl", - href: "www.google.com", - }, - ], - }, - }) - expect(mdParser.markdown.parse("[test](www.google.com)")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "markdownLink", - children: [{ type: "text", value: "test" }], - href: "www.google.com", - }, - ], - }, - }) - }) - it("can parse markdown links with relative URLs", () => { - expect(mdParser.markdown.parse("[about us](/about-us)")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - children: [ - { - type: "text", - value: "about", - }, - { - type: "whitespace", - }, - { - type: "text", - value: "us", - }, - ], - href: "/about-us", - type: "markdownLink", - }, - ], - }, - }) - expect(mdParser.markdown.parse("[test](www.google.com)")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "markdownLink", - children: [{ type: "text", value: "test" }], - href: "www.google.com", - }, - ], - }, - }) - }) - - it("mdParser can parse detail on demand syntax", () => { - expect(mdParser.markdown.parse("[**dod**](#dod:thing)")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "detailOnDemand", - children: [ - { - type: "plainBold", - children: [ - { - type: "text", - value: "dod", - }, - ], - }, - ], - term: "thing", - }, - ], - }, - }) - expect( - mdParser.markdown.parse("[a dod with multiple words](#dod:thing)") - ).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "detailOnDemand", - children: [ - { - type: "text", - value: "a", - }, - { type: "whitespace" }, - { - type: "text", - value: "dod", - }, - { type: "whitespace" }, - { - type: "text", - value: "with", - }, - { type: "whitespace" }, - { - type: "text", - value: "multiple", - }, - { type: "whitespace" }, - { - type: "text", - value: "words", - }, - ], - term: "thing", - }, - ], - }, - }) - }) - it("mdParser can parse words and newlines", () => { - expect( - mdParser.markdown.parse(`hello - -how **are** you?`) - ).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "hello", - }, - { - type: "newline", - }, - { - type: "newline", - }, - - { - type: "text", - value: "how", - }, - { type: "whitespace" }, - { - children: [ - { - type: "text", - value: "are", - }, - ], - type: "bold", - }, - { type: "whitespace" }, - - { - type: "text", - value: "you?", - }, - ], - }, - }) - }) - - it("mdParser can parse nested bold and italics", () => { - expect( - mdParser.markdown.parse( - "Hello _I am italicized and **I am bolded and italicized**_" - ) - ).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "Hello", - }, - { type: "whitespace" }, - { - children: [ - { - type: "text", - value: "I", - }, - { type: "whitespace" }, - { - type: "text", - value: "am", - }, - { type: "whitespace" }, - { - type: "text", - value: "italicized", - }, - { type: "whitespace" }, - { - type: "text", - value: "and", - }, - { type: "whitespace" }, - { - children: [ - { - type: "text", - value: "I", - }, - { type: "whitespace" }, - { - type: "text", - value: "am", - }, - { type: "whitespace" }, - { - type: "text", - value: "bolded", - }, - { type: "whitespace" }, - { - type: "text", - value: "and", - }, - { type: "whitespace" }, - { - type: "text", - value: "italicized", - }, - ], - type: "boldWithoutItalic", - }, - ], - type: "italic", - }, - ], - }, - }) - }) - - it("mdParser can parse links inside bold and italics", () => { - expect( - mdParser.markdown.parse( - "**_[bold and italic](www.ourworldindata.org)_**" - ) - ).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - children: [ - { - children: [ - { - children: [ - { - type: "text", - value: "bold", - }, - { type: "whitespace" }, - { - type: "text", - value: "and", - }, - { type: "whitespace" }, - { - type: "text", - value: "italic", - }, - ], - href: "www.ourworldindata.org", - type: "markdownLink", - }, - ], - type: "italicWithoutBold", - }, - ], - type: "bold", - }, - ], - }, - }) - - expect( - mdParser.markdown.parse("_**[italic and bold](www.google.com)**_") - ).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - children: [ - { - children: [ - { - children: [ - { - type: "text", - value: "italic", - }, - { - type: "whitespace", - }, - { - type: "text", - value: "and", - }, - { - type: "whitespace", - }, - { - type: "text", - value: "bold", - }, - ], - href: "www.google.com", - type: "markdownLink", - }, - ], - type: "boldWithoutItalic", - }, - ], - type: "italic", - }, - ], - }, - }) - }) - - it("mdParser can parse details on demand inside bold", () => { - expect( - mdParser.markdown.parse( - "**[an _italicized_ detail on demand](#dod:monad)**" - ) - ).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - children: [ - { - children: [ - { - type: "text", - value: "an", - }, - { - type: "whitespace", - }, - { - children: [ - { - type: "text", - value: "italicized", - }, - ], - type: "plainItalic", - }, - { - type: "whitespace", - }, - { - type: "text", - value: "detail", - }, - { - type: "whitespace", - }, - { - type: "text", - value: "on", - }, - { - type: "whitespace", - }, - { - type: "text", - value: "demand", - }, - ], - term: "monad", - type: "detailOnDemand", - }, - ], - type: "bold", - }, - ], - }, - }) - }) - - it("mdParser can parse words adjacent to bold", () => { - expect(mdParser.markdown.parse("**bold**-word")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "bold", - children: [{ type: "text", value: "bold" }], - }, - { type: "text", value: "-word" }, - ], - }, - }) - }) - - it("Parser can parse bold starting and stopping inside a word", () => { - expect(mdParser.markdown.parse("test**some**postfix")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "test", - }, - { - type: "bold", - children: [ - { - type: "text", - value: "some", - }, - ], - }, - { - type: "text", - value: "postfix", - }, - ], - }, - }) - }) - - it("parses unfinished bold correctly as text", () => { - expect(mdParser.markdown.parse("** unfinished bold")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "**", - }, - - { - type: "whitespace", - }, - - { - type: "text", - value: "unfinished", - }, - - { - type: "whitespace", - }, - - { - type: "text", - value: "bold", - }, - ], - }, - }) - }) - - it("parses unfinished bold with finished italic correctly", () => { - expect( - mdParser.markdown.parse("** unfinished bold _ italic _") - ).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "**", - }, - { - type: "whitespace", - }, - - { - type: "text", - value: "unfinished", - }, - - { - type: "whitespace", - }, - - { - type: "text", - value: "bold", - }, - - { - type: "whitespace", - }, - { - type: "italic", - children: [ - { - type: "whitespace", - }, - { - type: "text", - value: "italic", - }, - { - type: "whitespace", - }, - ], - }, - ], - }, - }) - }) - - it("parses nested-in-bold, non-spaced italics", () => { - expect(mdParser.markdown.parse("**one-_two_-three**")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - children: [ - { - type: "text", - value: "one-", - }, - { - children: [ - { - type: "text", - value: "two", - }, - ], - type: "italicWithoutBold", - }, - { - type: "text", - value: "-three", - }, - ], - type: "bold", - }, - ], - }, - }) - }) - - it("parses nested-in-italic, non-spaced bold", () => { - expect(mdParser.markdown.parse("_one-**two**-three_")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - children: [ - { - type: "text", - value: "one-", - }, - { - children: [ - { - type: "text", - value: "two", - }, - ], - type: "boldWithoutItalic", - }, - { - type: "text", - value: "-three", - }, - ], - type: "italic", - }, - ], - }, - }) - }) - - it("parses markdown links with just bold or just italic correctly and ignores nested bold/italic", () => { - expect( - mdParser.markdown.parse( - "[A **bold** _italic **nonnested**_ link](https://owid.io/test)" - ) - ).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "markdownLink", - children: [ - { - type: "text", - value: "A", - }, - { - type: "whitespace", - }, - { - type: "plainBold", - children: [ - { - type: "text", - value: "bold", - }, - ], - }, - { - type: "whitespace", - }, - { - type: "plainItalic", - children: [ - { - type: "text", - value: "italic", - }, - { - type: "whitespace", - }, - { - type: "text", - value: "**nonnested**", - }, - ], - }, - { - type: "whitespace", - }, - { - type: "text", - value: "link", - }, - ], - href: "https://owid.io/test", - }, - ], - }, - }) - }) - it("Parses nonbreaking spaces as text", () => { - expect(mdParser.markdown.parse("text with nonbreakingĀ space")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "text", - }, - { - type: "whitespace", - }, - { - type: "text", - value: "with", - }, - { - type: "whitespace", - }, - { - type: "text", - value: "nonbreaking", - }, - { - type: "text", - value: "Ā ", - }, - { - type: "text", - value: "space", - }, - ], - }, - }) - }) - it("Parses whitespace preceding a newline", () => { - const input = - "this-line-ends-with-a-space" + - " " + - "\n" + - "but-the-newline-should-be-tracked-separately" - expect(mdParser.markdown.parse(input)).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "this-line-ends-with-a-space", - }, - { - type: "whitespace", - }, - { - type: "newline", - }, - { - type: "text", - value: "but-the-newline-should-be-tracked-separately", - }, - ], - }, - }) - }) - it("Parses newlines surrounded by whitespace", () => { - const input = - "this-line-ends-with-a-space" + - " " + - "\n\n" + - " " + - "but-the-newline-should-be-tracked-separately" - expect(mdParser.markdown.parse(input)).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "this-line-ends-with-a-space", - }, - { - type: "whitespace", - }, - { - type: "newline", - }, - { - type: "newline", - }, - { - type: "whitespace", - }, - { - type: "text", - value: "but-the-newline-should-be-tracked-separately", - }, - ], - }, - }) - }) - it("parses link inside brackets", () => { - expect(mdParser.markdown.parse("[[link](www.google.com)]")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "[", - }, - { - type: "markdownLink", - children: [ - { - type: "text", - value: "link", - }, - ], - href: "www.google.com", - }, - { - type: "text", - value: "]", - }, - ], - }, - }) - }) - it("parses link inside parentheses", () => { - expect(mdParser.markdown.parse("([link](www.google.com))")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "(", - }, - { - type: "markdownLink", - children: [ - { - type: "text", - value: "link", - }, - ], - href: "www.google.com", - }, - { - type: "text", - value: ")", - }, - ], - }, - }) - }) - it("parses parens inside link inside parentheses", () => { - expect(mdParser.markdown.parse("([l(i)nk](www.google.com))")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "(", - }, - { - type: "markdownLink", - children: [ - { - type: "text", - value: "l(i)nk", - }, - ], - href: "www.google.com", - }, - { - type: "text", - value: ")", - }, - ], - }, - }) - }) - - it("parses parens next to closing bold sequence", () => { - expect(mdParser.markdown.parse("**Our World in Data (OWID)**")).toEqual( - { - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "bold", - children: [ - { type: "text", value: "Our" }, - { type: "whitespace" }, - { type: "text", value: "World" }, - { type: "whitespace" }, - { type: "text", value: "in" }, - { type: "whitespace" }, - { type: "text", value: "Data" }, - { type: "whitespace" }, - { type: "text", value: "(" }, - { type: "text", value: "OWID" }, - { type: "text", value: ")" }, - ], - }, - ], - }, - } - ) - }) - - it("parses too many underscores as text", () => { - expect(mdParser.markdown.parse("____abc__")).toEqual({ - status: true, - value: { - type: "MarkdownRoot", - children: [ - { - type: "text", - value: "____abc__", - }, - ], - }, - }) - }) -}) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/parser.ts b/packages/@ourworldindata/components/src/MarkdownTextWrap/parser.ts deleted file mode 100644 index e34a114b993..00000000000 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/parser.ts +++ /dev/null @@ -1,572 +0,0 @@ -import P from "parsimmon" -import { detailOnDemandRegex } from "@ourworldindata/utils" -// An AST inspired by MDAST -// Deviates because we want to track individual words, whitespace, and newlines to use with MarkdownTextWrap and our SVG exporter - -// How this parser works - -// This parser uses the parsimmon javascript library that implements a monadic parser combinator. -// We considered forking simple-markdown, but went with combinators for easier maintenance. -// You can think of a parser in this context as a generic class that parses an object of the type -// that is specified as the type parameter of the class. parser combinators build up more complex -// parsers by combining smaller parsers, mostly by specifying either sequences or alternatives. -// Alternatives (P.alt()) try a list of given parsers one at a time, backtracking when parsing with -// a given parser doesn't work and trying the next one. - -// Because of this it is important to consider the order of parsers in alternatives and to make -// sure that nested parsers fail when they should (e.g. when you have an opening ** for bold it -// is important that you make sure that you find a matching ** at the end in instead of -// accidentally consuming ** with a very generic parser that takes any token and that would -// then not let you match this end string fragment and close the bold tag). - -// By and large this parser tries to define a special type for every individual parser plus a -// parsing function. We don't really care about some of the differences (e.g. Text and NonBracketWord -// have to be parsed differently but result in the same shape of data). To make things more -// consistent though, every parser has it's own type name, even if they rather often just alias -// to Text. This should make it easy in the future to switch more parsed types to actual concrete -// types if we need a richer AST for some reason. - -// Parsing bold and italic in markdown is a bit more involved than most parsing jobs for actual -// programming languages that try harder to be parseable with a context free grammar. Consider -// that bold and italic can be nested in each other but it doesn't really make sense to nest -// bold in italic in bold (and this would create annoying ambiguity). For this reason this parser -// is quite explicit and has 3 different kinds of bold and italic: -// * one that can contain only contain text, whitespace and newlines -// * one that can also contain Urls, markdown links and Details on Demand but not other italic or bold -// * and finally one for the top level that can also contain the other one (bold that can have -// italic or italic that can have bold content) but in a non-nestable way - -// This might be overkill for our current needs but I wanted to err on the side of making the -// parser strict and precisise now to avoid weird ambiguities in the future. - -//#region Parser types - -// The default interface for nodes that (for now) we don't want to track as a special type -interface Text { - type: "text" - value: string -} - -// A special literal that we use when building lines with TextWrap -interface Newline { - type: "newline" -} - -// Another literal that's needed to know when to reinsert spaces (e.g. "**one**-two" versus "**one** -two") -interface Whitespace { - type: "whitespace" -} - -interface PlainUrl { - type: "plainUrl" - href: string -} - -type NonBracketWord = Text - -type NonParensWord = Text - -type NonSingleUnderscoreWord = Text - -type NonDoubleColonOrParensWord = Text - -type NonDoubleStarWord = Text - -type MarkdownLinkContent = Whitespace | Newline | NonBracketWord - -type DodCategory = Text - -type DodTerm = Text - -interface MarkdownLink { - type: "markdownLink" - children: MarkdownLinkContent[] - href: string -} - -type DetailsOnDemandContent = - | Whitespace - | Newline - | PlainItalic - | PlainBold - | NonBracketWord - -interface DetailOnDemand { - type: "detailOnDemand" - term: string - children: DetailsOnDemandContent[] -} - -type BoldWithoutItalicContent = - | Whitespace - | Newline - | PlainUrl - | MarkdownLink - | DetailOnDemand - | NonDoubleStarWord - -interface BoldWithoutItalic { - type: "boldWithoutItalic" - children: BoldWithoutItalicContent[] -} - -type BoldContent = - | ItalicWithoutBold - | Whitespace - | Newline - | PlainUrl - | MarkdownLink - | DetailOnDemand - | NonDoubleStarWord - -interface Bold { - type: "bold" - children: BoldContent[] -} - -type PlainBoldContent = Whitespace | Newline | NonDoubleStarWord - -interface PlainBold { - type: "plainBold" - children: PlainBoldContent[] -} - -type ItalicWithoutBoldContent = - | Whitespace - | Newline - | PlainUrl - | MarkdownLink - | DetailOnDemand - | NonSingleUnderscoreWord - -interface ItalicWithoutBold { - type: "italicWithoutBold" - children: ItalicWithoutBoldContent[] -} -type ItalicContent = - | BoldWithoutItalic - | Whitespace - | Newline - | PlainUrl - | MarkdownLink - | DetailOnDemand - | NonSingleUnderscoreWord - -interface Italic { - type: "italic" - children: ItalicContent[] -} - -type PlainItalicContent = Whitespace | Newline | NonSingleUnderscoreWord - -interface PlainItalic { - type: "plainItalic" - children: PlainItalicContent[] -} - -// TextSegment is used when we need to break up a string of non-whitespace characters -// into multiple segments because it may have "formatting tmesis" -// e.g. abso_freaking_lutely -type TextSegment = Bold | Italic | Text - -interface TextSegments { - type: "textSegments" - children: TextSegment[] -} - -export interface MarkdownRoot { - type: "MarkdownRoot" - children: Array -} - -type languagePartsType = typeof languageParts - -type MdParser = { - [P in keyof languagePartsType]: ReturnType -} - -export type EveryMarkdownRootNode = - | Newline - | Whitespace - | DetailOnDemand - | MarkdownLink - | PlainUrl - | Bold - | PlainBold - | Italic - | PlainItalic - | TextSegments - | Text - -// Every possible child of a MarkdownRoot node -export type EveryMarkdownChildNode = - | TextSegments - | NonSingleUnderscoreWord - | Bold - | BoldContent - | PlainBold - | Italic - | ItalicContent - | PlainItalic - -export type EveryMarkdownNode = - | EveryMarkdownChildNode - | EveryMarkdownRootNode - | BoldWithoutItalic - | ItalicWithoutBold - | NonSingleUnderscoreWord -// #endregion - -//#region Terminal parsers -const wordWithoutParensParser = (): P.Parser => - P.regex(/[^\s\(\)\[\]]+/).map((val) => ({ type: "text", value: val })) - -const singleParenParser = (): P.Parser => - P.regex(/[\(\)\[\]]/).map((val) => ({ type: "text", value: val })) - -const newlineParser = (): P.Parser => - P.regex(/\n/).result({ type: "newline" }) - -const nonbreakingSpaceParser = (): P.Parser => - // According to https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes - // the \s character class includes the following codepoints: [ \f\n\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff] - // We want to treat newlines and nonbreaking spaces specially. Out of the list above, the codepoints u+00a0 and u+202f look like - // they should be treated as non-breaking whitespace - P.regex(/[\u00a0\ufeff]+/).map((val) => ({ type: "text", value: val })) - -// Also based on that MDN article, we don't want to consume newlines when we're looking for spaces and tabs -// " \n" should turn into [{ type: "whitespace" }, { type: "newline" }] -const nonNewlineWhitespaceParser = (): P.Parser => - P.regex( - /[\r\t\f\v \u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]+/ - ).result({ type: "whitespace" }) - -const plainUrlParser = (): P.Parser => - P.regex(urlRegex).map((result) => ({ - type: "plainUrl", - href: result, - })) - -// https://urlregex.com -const urlRegex = - /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~:%\/\.\w\-_]*)?\??(?:[\-\+=&;%@~:\.\w_]*)#?(?:[\.\!\/\\\w\-]*))?)/ - -const nonBracketWordParser: (r: MdParser) => P.Parser = () => - P.regex(/[^\[\]\s]+/).map((val) => ({ type: "text", value: val })) // no brackets, no WS - -const nonParensWordParser: (r: MdParser) => P.Parser = () => - P.regex(/[^\(\)\s]+/).map((val) => ({ type: "text", value: val })) // no parens, no WS - -const nonDoubleColonOrParensWordParser: ( - r: MdParser -) => P.Parser = () => - P.regex(/([^\(\):\s]|:(?!:))+/).map((val) => ({ type: "text", value: val })) // no parens, no WS, no :: - -const nonSingleUnderscoreWordParser: ( - r: MdParser -) => P.Parser = () => - P.regex(/[^_\s]+/).map((val) => ({ type: "text", value: val })) // no WS, no _ - -const nonDoubleStarWordParser: ( - r: MdParser -) => P.Parser = () => - P.regex(/([^*\s]|\*(?!\*))+/).map((val) => ({ type: "text", value: val })) // no WS, no ** - -const nonSpecialCharactersParser: (r: MdParser) => P.Parser = () => - P.regex(/[^\s*_\(\)\[\]]+/).map((value) => ({ type: "text", value })) // Consume up to one of *_()[] - -const dodCategoryParser: (r: MdParser) => P.Parser = () => - P.regex(/([^\(\):\s]|:(?!:))+/).map((val) => ({ - type: "text", - value: val, - })) // no WS, no parens, no :: - -const dodTermParser: (r: MdParser) => P.Parser = () => - P.regex(/([^\(\):\s]|:(?!:))+/).map((val) => ({ - type: "text", - value: val, - })) // no WS, no parens, no :: - -//#endregion - -//#region Higher level parsers - -const markdownLinkContentParser: ( - r: MdParser -) => P.Parser = (r: MdParser) => - P.alt( - r.newline, - r.nonbreakingSpace, - r.nonNewlineWhitespace, - r.plainBold, - r.plainItalic, - r.nonBracketWord - ) - -const markdownLinkParser: (r: MdParser) => P.Parser = ( - r: MdParser -) => - P.seqObj<{ children: MarkdownLinkContent[]; href: string }>( - P.string("["), - [ - "children", - r.markdownLinkContent /* as P.Parser */ - .atLeast(1), - ], - P.string("]("), - ["href", P.alt(P.regex(/\/[\w\-]+/), P.regex(urlRegex))], - P.string(")") - ).map(({ children, href }) => ({ - type: "markdownLink", - children, - href, - })) - -const detailOnDemandContentParser: ( - r: MdParser -) => P.Parser = (r: MdParser) => - P.alt( - // In TS 4.7 parsimmon could type the parser as Covariant on its type parameter which would remove the need for these casts - r.newline, - r.nonbreakingSpace, - r.nonNewlineWhitespace, - r.plainBold, - r.plainItalic, - r.nonBracketWord - ) - -export function extractDetailsFromSyntax(str: string): string[] { - return [...str.matchAll(new RegExp(detailOnDemandRegex, "g"))].map( - ([_, term]) => term - ) -} - -const detailOnDemandParser: (r: MdParser) => P.Parser = ( - r: MdParser -) => - P.seqObj<{ - category: Text - term: Text - children: DetailsOnDemandContent[] - }>( - P.string("["), - ["children", r.detailOnDemandContent.atLeast(1)], - P.string("](#dod:"), - ["term", r.dodTerm], - P.string(")") - ).map(({ children, term }) => ({ - type: "detailOnDemand", - term: term.value, - children, - })) - -const boldWithoutItalicContentParser: ( - r: MdParser -) => P.Parser = (r: MdParser) => - P.alt( - r.newline, - r.nonbreakingSpace, - r.nonNewlineWhitespace, - r.detailOnDemand, - r.markdownLink, - r.plainUrl, - r.nonDoubleStarWord - ) - -const boldWithoutItalicParser: (r: MdParser) => P.Parser = ( - r: MdParser -) => - P.seqObj<{ children: BoldWithoutItalicContent[] }>( - P.string("**"), - ["children", r.boldWithoutItalicContent.atLeast(1)], - P.string("**") - ).map(({ children }) => ({ - type: "boldWithoutItalic", - children, - })) - -const boldContentParser: (r: MdParser) => P.Parser = ( - r: MdParser -) => - P.alt( - r.newline, - r.nonbreakingSpace, - r.nonNewlineWhitespace, - r.italicWithoutBold, - r.detailOnDemand, - r.markdownLink, - r.plainUrl, - r.nonSpecialCharacters, - r.singleParen - ) - -const boldParser: (r: MdParser) => P.Parser = (r: MdParser) => - P.seqObj<{ children: BoldContent[] }>( - P.string("**"), - ["children", r.boldContent.atLeast(1)], - P.string("**") - ).map(({ children }) => ({ - type: "bold", - children, - })) - -const plainBoldContentParser: (r: MdParser) => P.Parser = ( - r: MdParser -) => - P.alt( - r.newline, - r.nonbreakingSpace, - r.nonNewlineWhitespace, - r.nonDoubleStarWord - ) - -const plainBoldParser: (r: MdParser) => P.Parser = (r: MdParser) => - P.seqObj( - P.string("**"), - ["children", r.plainBoldContent.atLeast(1)], - P.string("**") - ).map(({ children }) => ({ - type: "plainBold", - children, - })) - -const italicWithoutBoldContentParser: ( - r: MdParser -) => P.Parser = (r: MdParser) => - P.alt( - r.newline, - r.nonbreakingSpace, - r.nonNewlineWhitespace, - r.newline, - r.detailOnDemand, - r.markdownLink, - r.plainUrl, - r.nonSpecialCharacters, - r.singleParen - ) - -const italicWithoutBoldParser: (r: MdParser) => P.Parser = ( - r: MdParser -) => - P.seqObj<{ children: ItalicWithoutBoldContent[] }>( - P.string("_"), - ["children", r.italicWithoutBoldContent.atLeast(1)], - P.string("_") - ).map(({ children }) => ({ - type: "italicWithoutBold", - children, - })) -const italicContentParser: (r: MdParser) => P.Parser = ( - r: MdParser -) => - P.alt( - r.newline, - r.nonbreakingSpace, - r.nonNewlineWhitespace, - r.boldWithoutItalic, - r.detailOnDemand, - r.markdownLink, - r.plainUrl, - r.nonSpecialCharacters, - r.singleParen - ) - -const italicParser: (r: MdParser) => P.Parser = (r: MdParser) => - P.seqObj( - P.string("_"), - ["children", r.italicContent.atLeast(1)], - P.string("_") - ).map(({ children }) => ({ - type: "italic", - children, - })) - -const plainItalicContentParser: ( - r: MdParser -) => P.Parser = (r: MdParser) => - P.alt( - r.newline, - r.nonbreakingSpace, - r.nonNewlineWhitespace, - r.nonSingleUnderscoreWord - ) - -const plainItalicParser: (r: MdParser) => P.Parser = ( - r: MdParser -) => - P.seqObj( - P.string("_"), - ["children", r.plainItalicContent.atLeast(1)], - P.string("_") - ).map(({ children }) => ({ - type: "plainItalic", - children, - })) - -//#endregion - -//#region Top level language construction - -const markdownParser: (r: MdParser) => P.Parser = (r) => - // The order is crucial here! - P.alt( - r.newline, - r.nonbreakingSpace, - r.nonNewlineWhitespace, - r.detailOnDemand, - r.markdownLink, - r.plainUrl, - r.bold, - r.italic, - // Consume up to ** or _, if possible - r.nonSpecialCharacters, - // Otherwise consume everything - r.wordWithoutParens, - r.singleParen - ) - .atLeast(1) - .map( - (tokens): MarkdownRoot => ({ - type: "MarkdownRoot", - children: tokens, - }) - ) - -const languageParts = { - markdown: markdownParser, - newline: newlineParser, - nonbreakingSpace: nonbreakingSpaceParser, - nonNewlineWhitespace: nonNewlineWhitespaceParser, - detailOnDemand: detailOnDemandParser, - markdownLink: markdownLinkParser, - plainUrl: plainUrlParser, - bold: boldParser, - italic: italicParser, - plainBold: plainBoldParser, - plainItalic: plainItalicParser, - wordWithoutParens: wordWithoutParensParser, - singleParen: singleParenParser, - // Utility parsers below - these will never be tried on the top level because text covers everything else - detailOnDemandContent: detailOnDemandContentParser, - markdownLinkContent: markdownLinkContentParser, - boldContent: boldContentParser, - plainBoldContent: plainBoldContentParser, - boldWithoutItalic: boldWithoutItalicParser, - boldWithoutItalicContent: boldWithoutItalicContentParser, - plainItalicContent: plainItalicContentParser, - italicContent: italicContentParser, - italicWithoutBold: italicWithoutBoldParser, - italicWithoutBoldContent: italicWithoutBoldContentParser, - nonBracketWord: nonBracketWordParser, - nonParensWord: nonParensWordParser, - nonDoubleColonOrParensWord: nonDoubleColonOrParensWordParser, - nonDoubleStarWord: nonDoubleStarWordParser, - nonSpecialCharacters: nonSpecialCharactersParser, - nonSingleUnderscoreWord: nonSingleUnderscoreWordParser, - dodCategory: dodCategoryParser, - dodTerm: dodTermParser, -} as const - -export const mdParser: MdParser = P.createLanguage(languageParts) - -//#endregion diff --git a/packages/@ourworldindata/components/src/index.ts b/packages/@ourworldindata/components/src/index.ts index 3ba23fb1bda..f9ba2f288ef 100644 --- a/packages/@ourworldindata/components/src/index.ts +++ b/packages/@ourworldindata/components/src/index.ts @@ -6,22 +6,11 @@ export { } from "./MarkdownTextWrap/MarkdownTextWrap.js" export { SimpleMarkdownText } from "./SimpleMarkdownText.js" - -export { - extractDetailsFromSyntax, - mdParser, - type MarkdownRoot, - type EveryMarkdownChildNode, - type EveryMarkdownNode, - type EveryMarkdownRootNode, -} from "./MarkdownTextWrap/parser.js" - export { getLinkType, getUrlTarget, checkIsInternalLink, convertHeadingTextToId, - markdownToEnrichedTextBlock, } from "./GdocsUtils.js" export { ExpandableToggle } from "./ExpandableToggle/ExpandableToggle.js" diff --git a/yarn.lock b/yarn.lock index ebeef571178..5ac3cd34531 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4163,6 +4163,15 @@ __metadata: languageName: node linkType: hard +"@types/debug@npm:^4.0.0": + version: 4.1.10 + resolution: "@types/debug@npm:4.1.10" + dependencies: + "@types/ms": "npm:*" + checksum: 938f79c5b610f851da9c67ecd8641a09b33ce9cb38fe4c9f4d20ee743d6bccb5d8e9a833a4cd23e0684a316622af67a0634fa706baea5a01f5219961d1976314 + languageName: node + linkType: hard + "@types/dom-speech-recognition@npm:^0.0.1": version: 0.0.1 resolution: "@types/dom-speech-recognition@npm:0.0.1" @@ -4431,6 +4440,15 @@ __metadata: languageName: node linkType: hard +"@types/mdast@npm:^4.0.0, @types/mdast@npm:^4.0.2": + version: 4.0.2 + resolution: "@types/mdast@npm:4.0.2" + dependencies: + "@types/unist": "npm:*" + checksum: cb2915c71eb94319880a0185459cac89b73f14f903998e1b088055cc51b71de91d7c76bda46c584e5d9c2d21d817680a3b2de56ebb737c14c641f81ad30028ed + languageName: node + linkType: hard + "@types/mime@npm:^1": version: 1.3.2 resolution: "@types/mime@npm:1.3.2" @@ -4459,6 +4477,13 @@ __metadata: languageName: node linkType: hard +"@types/ms@npm:*": + version: 0.7.33 + resolution: "@types/ms@npm:0.7.33" + checksum: 2cb5af611ace05ab2ae40422c8539850cf983197982bb04b83acf59e6e692e2faccf336a82ac4db97f7ea28f2baa0a8990fa5eb1cd72c5cab17b5b3609b0b650 + languageName: node + linkType: hard + "@types/mysql@npm:^2.15.21": version: 2.15.21 resolution: "@types/mysql@npm:2.15.21" @@ -4826,6 +4851,13 @@ __metadata: languageName: node linkType: hard +"@types/unist@npm:*, @types/unist@npm:^3.0.0": + version: 3.0.1 + resolution: "@types/unist@npm:3.0.1" + checksum: 5692b9b271724d4ab6434f125350436bb35eb8ffdaeb5b762c7282870be9562f364bc49016e4b25afb442b68ad9ee042a262c093af07738af6b8a9b858335e37 + languageName: node + linkType: hard + "@types/unist@npm:^2, @types/unist@npm:^2.0.0, @types/unist@npm:^2.0.2, @types/unist@npm:^2.0.3": version: 2.0.9 resolution: "@types/unist@npm:2.0.9" @@ -6478,6 +6510,13 @@ __metadata: languageName: node linkType: hard +"character-entities@npm:^2.0.0": + version: 2.0.2 + resolution: "character-entities@npm:2.0.2" + checksum: c8dd1f4bf1a92fccf7d2fad9673660a88b37854557d30f6076c32fedfb92d1420208298829ff1d3b6b4fa1c7012e8326c45e7f5c3ed1e9a09ec177593c521b2f + languageName: node + linkType: hard + "character-reference-invalid@npm:^1.0.0": version: 1.1.4 resolution: "character-reference-invalid@npm:1.1.4" @@ -7889,6 +7928,15 @@ __metadata: languageName: node linkType: hard +"decode-named-character-reference@npm:^1.0.0": + version: 1.0.2 + resolution: "decode-named-character-reference@npm:1.0.2" + dependencies: + character-entities: "npm:^2.0.0" + checksum: f4c71d3b93105f20076052f9cb1523a22a9c796b8296cd35eef1ca54239c78d182c136a848b83ff8da2071e3ae2b1d300bf29d00650a6d6e675438cc31b11d78 + languageName: node + linkType: hard + "decode-uri-component@npm:^0.2.0": version: 0.2.2 resolution: "decode-uri-component@npm:0.2.2" @@ -8053,6 +8101,13 @@ __metadata: languageName: node linkType: hard +"dequal@npm:^2.0.0": + version: 2.0.3 + resolution: "dequal@npm:2.0.3" + checksum: 6ff05a7561f33603df87c45e389c9ac0a95e3c056be3da1a0c4702149e3a7f6fe5ffbb294478687ba51a9e95f3a60e8b6b9005993acd79c292c7d15f71964b6b + languageName: node + linkType: hard + "destroy@npm:1.2.0": version: 1.2.0 resolution: "destroy@npm:1.2.0" @@ -8088,6 +8143,15 @@ __metadata: languageName: node linkType: hard +"devlop@npm:^1.0.0": + version: 1.1.0 + resolution: "devlop@npm:1.1.0" + dependencies: + dequal: "npm:^2.0.0" + checksum: 3cc5f903d02d279d6dc4aa71ab6ed9898b9f4d1f861cc5421ce7357893c21b9520de78afb203c92bd650a6977ad0ca98195453a0707a39958cf5fea3b0a8ddd8 + languageName: node + linkType: hard + "diff-sequences@npm:^29.6.3": version: 29.6.3 resolution: "diff-sequences@npm:29.6.3" @@ -10433,6 +10497,7 @@ __metadata: "@types/jsonwebtoken": "npm:^9.0.0" "@types/lodash": "npm:^4.14.185" "@types/md5": "npm:^2.3.2" + "@types/mdast": "npm:^4.0.2" "@types/minimist": "npm:^1.2.2" "@types/mousetrap": "npm:^1.6.9" "@types/mysql": "npm:^2.15.21" @@ -10517,6 +10582,7 @@ __metadata: lodash: "npm:^4.17.20" mathjax-full: "npm:^3.1.0" md5: "npm:^2.3.0" + mdast-util-from-markdown: "npm:^2.0.0" minimist: "npm:^1.2.6" mobx: "npm:^5.15.7" mobx-formatters: "npm:^1.0.2" @@ -13608,6 +13674,26 @@ __metadata: languageName: node linkType: hard +"mdast-util-from-markdown@npm:^2.0.0": + version: 2.0.0 + resolution: "mdast-util-from-markdown@npm:2.0.0" + dependencies: + "@types/mdast": "npm:^4.0.0" + "@types/unist": "npm:^3.0.0" + decode-named-character-reference: "npm:^1.0.0" + devlop: "npm:^1.0.0" + mdast-util-to-string: "npm:^4.0.0" + micromark: "npm:^4.0.0" + micromark-util-decode-numeric-character-reference: "npm:^2.0.0" + micromark-util-decode-string: "npm:^2.0.0" + micromark-util-normalize-identifier: "npm:^2.0.0" + micromark-util-symbol: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + unist-util-stringify-position: "npm:^4.0.0" + checksum: 960e28a8ff3d989cc25a615d14e9a1d95d145b938dc08323ce44689be6dd052ece544d2acf5242cedb8ad6ccdc3ffe854989b7c2516c6e62f2fca42b6d11a2da + languageName: node + linkType: hard + "mdast-util-to-hast@npm:^10.2.0": version: 10.2.0 resolution: "mdast-util-to-hast@npm:10.2.0" @@ -13631,6 +13717,15 @@ __metadata: languageName: node linkType: hard +"mdast-util-to-string@npm:^4.0.0": + version: 4.0.0 + resolution: "mdast-util-to-string@npm:4.0.0" + dependencies: + "@types/mdast": "npm:^4.0.0" + checksum: f4a5dbb9ea03521d7d3e26a9ba5652a1d6fbd55706dddd2155427517085688830e0ecd3f12418cfd40892640886eb39a4034c3c967d85e01e2fa64cfb53cff05 + languageName: node + linkType: hard + "mdn-data@npm:2.0.28": version: 2.0.28 resolution: "mdn-data@npm:2.0.28" @@ -13713,6 +13808,242 @@ __metadata: languageName: node linkType: hard +"micromark-core-commonmark@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-core-commonmark@npm:2.0.0" + dependencies: + decode-named-character-reference: "npm:^1.0.0" + devlop: "npm:^1.0.0" + micromark-factory-destination: "npm:^2.0.0" + micromark-factory-label: "npm:^2.0.0" + micromark-factory-space: "npm:^2.0.0" + micromark-factory-title: "npm:^2.0.0" + micromark-factory-whitespace: "npm:^2.0.0" + micromark-util-character: "npm:^2.0.0" + micromark-util-chunked: "npm:^2.0.0" + micromark-util-classify-character: "npm:^2.0.0" + micromark-util-html-tag-name: "npm:^2.0.0" + micromark-util-normalize-identifier: "npm:^2.0.0" + micromark-util-resolve-all: "npm:^2.0.0" + micromark-util-subtokenize: "npm:^2.0.0" + micromark-util-symbol: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + checksum: 67f6e2f062f42a7ae21e8a409f3663843703a830ff27cf0f41cb0fb712c58e55409db428531d8124c4ef8d698cd81e7eb41485d24b8c352d2f0c06b535865367 + languageName: node + linkType: hard + +"micromark-factory-destination@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-factory-destination@npm:2.0.0" + dependencies: + micromark-util-character: "npm:^2.0.0" + micromark-util-symbol: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + checksum: d36e65ed1c072ff4148b016783148ba7c68a078991154625723e24bda3945160268fb91079fb28618e1613c2b6e70390a8ddc544c45410288aa27b413593071a + languageName: node + linkType: hard + +"micromark-factory-label@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-factory-label@npm:2.0.0" + dependencies: + devlop: "npm:^1.0.0" + micromark-util-character: "npm:^2.0.0" + micromark-util-symbol: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + checksum: c021dbd0ed367610d35f2bae21209bc804d1a6d1286ffce458fd6a717f4d7fe581a7cba7d5c2d7a63757c44eb927c80d6a571d6ea7969fae1b48ab6461d109c4 + languageName: node + linkType: hard + +"micromark-factory-space@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-factory-space@npm:2.0.0" + dependencies: + micromark-util-character: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + checksum: 4ffdcdc2f759887bbb356500cb460b3915ecddcb5d85c3618d7df68ad05d13ed02b1153ee1845677b7d8126df8f388288b84fcf0d943bd9c92bcc71cd7222e37 + languageName: node + linkType: hard + +"micromark-factory-title@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-factory-title@npm:2.0.0" + dependencies: + micromark-factory-space: "npm:^2.0.0" + micromark-util-character: "npm:^2.0.0" + micromark-util-symbol: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + checksum: 39e1ac23af3554e6e652e56065579bc7faf21ade7b8704b29c175871b4152b7109b790bb3cae0f7e088381139c6bac9553b8400772c3d322e4fa635f813a3578 + languageName: node + linkType: hard + +"micromark-factory-whitespace@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-factory-whitespace@npm:2.0.0" + dependencies: + micromark-factory-space: "npm:^2.0.0" + micromark-util-character: "npm:^2.0.0" + micromark-util-symbol: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + checksum: 9587c2546d1a58b4d5472b42adf05463f6212d0449455285662d63cd8eaed89c6b159ac82713fcee5f9dd88628c24307d9533cccd8971a2f3f4d48702f8f850a + languageName: node + linkType: hard + +"micromark-util-character@npm:^2.0.0": + version: 2.0.1 + resolution: "micromark-util-character@npm:2.0.1" + dependencies: + micromark-util-symbol: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + checksum: 6eb5e58c6ae5f416f71a2b777544d3118fdb04d4fd62ea27f7920d0c58fa56ddd3fe17331fbba7f0c70fa6f90bdf7910e8e951f018f0500f883369d64fd6b925 + languageName: node + linkType: hard + +"micromark-util-chunked@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-chunked@npm:2.0.0" + dependencies: + micromark-util-symbol: "npm:^2.0.0" + checksum: 324f95cccdae061332a8241936eaba6ef0782a1e355bac5c607ad2564fd3744929be7dc81651315a2921535747a33243e6a5606bcb64b7a56d49b6d74ea1a3d4 + languageName: node + linkType: hard + +"micromark-util-classify-character@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-classify-character@npm:2.0.0" + dependencies: + micromark-util-character: "npm:^2.0.0" + micromark-util-symbol: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + checksum: 086e52904deffebb793fb1c08c94aabb8901f76958142dfc3a6282890ebaa983b285e69bd602b9d507f1b758ed38e75a994d2ad9fbbefa7de2584f67a16af405 + languageName: node + linkType: hard + +"micromark-util-combine-extensions@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-combine-extensions@npm:2.0.0" + dependencies: + micromark-util-chunked: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + checksum: 107c47700343f365b4ed81551e18bc3458b573c500e56ac052b2490bd548adc475216e41d2271633a8867fac66fc22ba3e0a2d74a31ed79b9870ca947eb4e3ba + languageName: node + linkType: hard + +"micromark-util-decode-numeric-character-reference@npm:^2.0.0": + version: 2.0.1 + resolution: "micromark-util-decode-numeric-character-reference@npm:2.0.1" + dependencies: + micromark-util-symbol: "npm:^2.0.0" + checksum: 9512507722efd2033a9f08715eeef787fbfe27e23edf55db21423d46d82ab46f76c89b4f960be3f5e50a2d388d89658afc0647989cf256d051e9ea01277a1adb + languageName: node + linkType: hard + +"micromark-util-decode-string@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-decode-string@npm:2.0.0" + dependencies: + decode-named-character-reference: "npm:^1.0.0" + micromark-util-character: "npm:^2.0.0" + micromark-util-decode-numeric-character-reference: "npm:^2.0.0" + micromark-util-symbol: "npm:^2.0.0" + checksum: a75daf32a4a6b549e9f19b4d833ebfeb09a32a9a1f9ce50f35dec6b6a3e4f9f121f49024ba7f9c91c55ebe792f7c7a332fc9604795181b6a612637df0df5b959 + languageName: node + linkType: hard + +"micromark-util-encode@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-encode@npm:2.0.0" + checksum: 853a3f33fce72aaf4ffa60b7f2b6fcfca40b270b3466e1b96561b02185d2bd8c01dd7948bc31a24ac014f4cc854e545ca9a8e9cf7ea46262f9d24c9e88551c66 + languageName: node + linkType: hard + +"micromark-util-html-tag-name@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-html-tag-name@npm:2.0.0" + checksum: d786d4486f93eb0ac5b628779809ca97c5dc60f3c9fc03eb565809831db181cf8cb7f05f9ac76852f3eb35461af0f89fa407b46f3a03f4f97a96754d8dc540d8 + languageName: node + linkType: hard + +"micromark-util-normalize-identifier@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-normalize-identifier@npm:2.0.0" + dependencies: + micromark-util-symbol: "npm:^2.0.0" + checksum: b36da2d3fd102053dadd953ce5c558328df12a63a8ac0e5aad13d4dda8e43b6a5d4a661baafe0a1cd8a260bead4b4a8e6e0e74193dd651e8484225bd4f4e68aa + languageName: node + linkType: hard + +"micromark-util-resolve-all@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-resolve-all@npm:2.0.0" + dependencies: + micromark-util-types: "npm:^2.0.0" + checksum: 31fe703b85572cb3f598ebe32750e59516925c7ff1f66cfe6afaebe0771a395a9eaa770787f2523d3c46082ea80e6c14f83643303740b3d650af7c96ebd30ccc + languageName: node + linkType: hard + +"micromark-util-sanitize-uri@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-sanitize-uri@npm:2.0.0" + dependencies: + micromark-util-character: "npm:^2.0.0" + micromark-util-encode: "npm:^2.0.0" + micromark-util-symbol: "npm:^2.0.0" + checksum: 7d10622f5a2bb058dda6d2e95b2735c43fdf8daa4f88a0863bc90eef6598f8e10e3df98e034341fcbc090d8021c53501308c463c49d3fe91f41eb64b5bf2766e + languageName: node + linkType: hard + +"micromark-util-subtokenize@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-subtokenize@npm:2.0.0" + dependencies: + devlop: "npm:^1.0.0" + micromark-util-chunked: "npm:^2.0.0" + micromark-util-symbol: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + checksum: 4d209894f9400ff73e093a4ce3d13870cd1f546b47e50355f849c4402cecd5d2039bd63bb624f2a09aaeba01a847634088942edb42f141e4869b3a85281cf64e + languageName: node + linkType: hard + +"micromark-util-symbol@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-symbol@npm:2.0.0" + checksum: 8c662644c326b384f02a5269974d843d400930cf6f5d6a8e6db1743fc8933f5ecc125b4203ad4ebca25447f5d23eb7e5bf1f75af34570c3fdd925cb618752fcd + languageName: node + linkType: hard + +"micromark-util-types@npm:^2.0.0": + version: 2.0.0 + resolution: "micromark-util-types@npm:2.0.0" + checksum: b88e0eefd4b7c8d86b54dbf4ed0094ef56a3b0c7774d040bd5c8146b8e4e05b1026bbf1cd9308c8fcd05ecdc0784507680c8cee9888a4d3c550e6e574f7aef62 + languageName: node + linkType: hard + +"micromark@npm:^4.0.0": + version: 4.0.0 + resolution: "micromark@npm:4.0.0" + dependencies: + "@types/debug": "npm:^4.0.0" + debug: "npm:^4.0.0" + decode-named-character-reference: "npm:^1.0.0" + devlop: "npm:^1.0.0" + micromark-core-commonmark: "npm:^2.0.0" + micromark-factory-space: "npm:^2.0.0" + micromark-util-character: "npm:^2.0.0" + micromark-util-chunked: "npm:^2.0.0" + micromark-util-combine-extensions: "npm:^2.0.0" + micromark-util-decode-numeric-character-reference: "npm:^2.0.0" + micromark-util-encode: "npm:^2.0.0" + micromark-util-normalize-identifier: "npm:^2.0.0" + micromark-util-resolve-all: "npm:^2.0.0" + micromark-util-sanitize-uri: "npm:^2.0.0" + micromark-util-subtokenize: "npm:^2.0.0" + micromark-util-symbol: "npm:^2.0.0" + micromark-util-types: "npm:^2.0.0" + checksum: a697c1c0c169077f5d5def9af26985baea9d4375395dcb974a96f63761d382b455d4595a60e856c83e653b1272a732e85128d992511d6dc938d61a35bdf98c99 + languageName: node + linkType: hard + "micromark@npm:~2.11.0": version: 2.11.4 resolution: "micromark@npm:2.11.4" @@ -19559,6 +19890,15 @@ __metadata: languageName: node linkType: hard +"unist-util-stringify-position@npm:^4.0.0": + version: 4.0.0 + resolution: "unist-util-stringify-position@npm:4.0.0" + dependencies: + "@types/unist": "npm:^3.0.0" + checksum: d15c88aca7a31902d95d5b5355bbe09583cf6f6ff6e59e134ef76c76d3c30bc1021f2d7ea5b7897c6d0858ed5f3770c1b19de9c78274f50d72f95a0d05f1af71 + languageName: node + linkType: hard + "unist-util-visit-parents@npm:^2.0.0": version: 2.1.2 resolution: "unist-util-visit-parents@npm:2.1.2" From fc32c832e30c56be30664bf4c95473ec1d6bb8d0 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Mon, 30 Oct 2023 13:34:34 +0100 Subject: [PATCH 03/19] :hammer: downgrade dependencies to avoid ESM for now Also fixed a few other issues --- adminSiteClient/ChartEditorPage.tsx | 2 +- baker/SiteBaker.tsx | 3 +- .../dodParserTestGenerator/generate-test.ts | 37 -- devTools/dodParserTestGenerator/tsconfig.json | 8 - package.json | 4 +- .../src/MarkdownTextWrap/MarkdownTextWrap.tsx | 18 +- packages/@ourworldindata/utils/src/Util.ts | 15 + packages/@ourworldindata/utils/src/index.ts | 3 +- tsconfig.json | 57 ++- yarn.lock | 344 +----------------- 10 files changed, 80 insertions(+), 411 deletions(-) delete mode 100644 devTools/dodParserTestGenerator/generate-test.ts delete mode 100644 devTools/dodParserTestGenerator/tsconfig.json diff --git a/adminSiteClient/ChartEditorPage.tsx b/adminSiteClient/ChartEditorPage.tsx index 7d5406aecee..db273e8ad81 100644 --- a/adminSiteClient/ChartEditorPage.tsx +++ b/adminSiteClient/ChartEditorPage.tsx @@ -10,7 +10,6 @@ import { IReactionDisposer, } from "mobx" import { Prompt, Redirect } from "react-router-dom" -import { extractDetailsFromSyntax } from "@ourworldindata/components" import { Bounds, capitalize, @@ -19,6 +18,7 @@ import { get, set, groupBy, + extractDetailsFromSyntax, } from "@ourworldindata/utils" import { Grapher, Topic, GrapherInterface } from "@ourworldindata/grapher" import { Admin } from "./Admin.js" diff --git a/baker/SiteBaker.tsx b/baker/SiteBaker.tsx index d73fa5fcddb..b5cb6e0f5e1 100644 --- a/baker/SiteBaker.tsx +++ b/baker/SiteBaker.tsx @@ -45,8 +45,9 @@ import { OwidGdocPublished, clone, LinkedChart, + extractDetailsFromSyntax, } from "@ourworldindata/utils" -import { extractDetailsFromSyntax } from "@ourworldindata/components" + import { execWrapper } from "../db/execWrapper.js" import { countryProfileSpecs } from "../site/countryProfileProjects.js" import { diff --git a/devTools/dodParserTestGenerator/generate-test.ts b/devTools/dodParserTestGenerator/generate-test.ts deleted file mode 100644 index 6fc0e6e34ae..00000000000 --- a/devTools/dodParserTestGenerator/generate-test.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { mdParser } from "@ourworldindata/components" - -import parseArgs from "minimist" -async function main(parsedArgs: parseArgs.ParsedArgs) { - const parseString = parsedArgs._[0] - const result = mdParser.markdown.parse(parseString) - const description = parsedArgs["d"] - if (parsedArgs["result-only"]) - console.log(JSON.stringify(result, undefined, 2)) - else - console.log(` - it(${description || "parses markdown correctly"}, () => { - expect(mdParser.markdown.parse("${parseString}")).toEqual( - ${JSON.stringify(result, undefined, 2)} - ) - }) - `) -} - -const parsedArgs = parseArgs(process.argv.slice(2), { - boolean: true, -}) - -if (parsedArgs["h"] || parsedArgs["help"]) { - console.log(`generate-tests.js - utility to generate tests for the DoD parser from an input text - -Usage: - dump-data.js (--result-only) (-d "Test description") '[test](hover::cat::term)' - -Options: - --result-only Only output the parse result, not the test case chrome around it - -d DESC Use the given desription for the test - `) - process.exit(0) -} else { - main(parsedArgs) -} diff --git a/devTools/dodParserTestGenerator/tsconfig.json b/devTools/dodParserTestGenerator/tsconfig.json deleted file mode 100644 index e0d38a4ff33..00000000000 --- a/devTools/dodParserTestGenerator/tsconfig.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "extends": "../tsconfigs/tsconfig.base.json", - "compilerOptions": { - "outDir": "../../itsJustJavascript/devTools/dodParserTestGenerator", - "rootDir": "." - }, - "references": [{ "path": "../../settings" }] -} diff --git a/package.json b/package.json index c5572c2d266..d99adf85805 100644 --- a/package.json +++ b/package.json @@ -82,7 +82,7 @@ "@types/jsonwebtoken": "^9.0.0", "@types/lodash": "^4.14.185", "@types/md5": "^2.3.2", - "@types/mdast": "^4.0.2", + "@types/mdast": "^3.0", "@types/minimist": "^1.2.2", "@types/mousetrap": "^1.6.9", "@types/mysql": "^2.15.21", @@ -155,7 +155,7 @@ "lodash": "^4.17.20", "mathjax-full": "^3.1.0", "md5": "^2.3.0", - "mdast-util-from-markdown": "^2.0.0", + "mdast-util-from-markdown": "^0.8.0", "minimist": "^1.2.6", "mobx": "^5.15.7", "mobx-formatters": "^1.0.2", diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 6344d299320..ec147e19cdc 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -12,8 +12,8 @@ import { FontFamily, } from "@ourworldindata/utils" import { TextWrap } from "../TextWrap/TextWrap.js" -import { fromMarkdown } from "mdast-util-from-markdown" -import { Root, Node, RootContentMap } from "mdast" +import fromMarkdown from "mdast-util-from-markdown" +import { Root, Content } from "mdast" import { match } from "ts-pattern" const SUPERSCRIPT_NUMERALS = { @@ -749,9 +749,9 @@ function convertMarkdownRootToIRTokens(node: Root): IRToken[] { return node.children.flatMap(convertMarkdownNodeToIRTokens) } -function convertMarkdownNodeToIRTokens( - node: RootContentMap[keyof RootContentMap] -): IRToken[] { +// When using mdast types version 4 this should be typed as: +// node: RootContentMap[keyof RootContentMap] +function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { const converted = match(node) .with( { @@ -970,6 +970,14 @@ function convertMarkdownNodeToIRTokens( return [new IRText(item.value)] } ) + .with( + { + type: "footnote", + }, + (item) => { + return item.children.flatMap(convertMarkdownNodeToIRTokens) + } + ) .exhaustive() return converted } diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index 189eaf37e6c..ad709a3e01d 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -1810,3 +1810,18 @@ export const formatAuthors = ({ return authorsText } + +/** Works for: + * #dod:text + * #dod:text-hyphenated + * #dod:text_underscored + * #dod:text_underscored-and-hyphenated + * Duplicated in parser.ts + */ +export const detailOnDemandRegex = /#dod:([\w\-_]+)/ + +export function extractDetailsFromSyntax(str: string): string[] { + return [...str.matchAll(new RegExp(detailOnDemandRegex, "g"))].map( + ([_, term]) => term + ) +} diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index 8314ca62ba1..f9d131ca419 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -422,6 +422,8 @@ export { without, zip, lowercaseObjectKeys, + detailOnDemandRegex, + extractDetailsFromSyntax, } from "./Util.js" export { isPresent } from "./isPresent.js" @@ -621,5 +623,4 @@ export { IMAGES_DIRECTORY, gdocUrlRegex, gdocIdRegex, - detailOnDemandRegex, } from "./GdocsConstants.js" diff --git a/tsconfig.json b/tsconfig.json index 65d5abae85e..fb046e9f480 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -7,20 +7,47 @@ }, "files": [], "references": [ - { "path": "./gridLang" }, - { "path": "./gitCms" }, - { "path": "./explorer" }, - { "path": "./datapage" }, - { "path": "./site" }, - { "path": "./settings" }, - { "path": "./adminSiteClient" }, - { "path": "./adminSiteServer" }, - { "path": "./devTools/svgTester" }, - { "path": "./devTools/schema" }, - { "path": "./devTools/schemaProcessor" }, - { "path": "./devTools/dodParserTestGenerator" }, - { "path": "./devTools/uploadWordpressImagesToObjStorage" }, - { "path": "./devTools/explorerTools" }, - { "path": "./devTools/regionsUpdater" } + { + "path": "./gridLang" + }, + { + "path": "./gitCms" + }, + { + "path": "./explorer" + }, + { + "path": "./datapage" + }, + { + "path": "./site" + }, + { + "path": "./settings" + }, + { + "path": "./adminSiteClient" + }, + { + "path": "./adminSiteServer" + }, + { + "path": "./devTools/svgTester" + }, + { + "path": "./devTools/schema" + }, + { + "path": "./devTools/schemaProcessor" + }, + { + "path": "./devTools/uploadWordpressImagesToObjStorage" + }, + { + "path": "./devTools/explorerTools" + }, + { + "path": "./devTools/regionsUpdater" + } ] } diff --git a/yarn.lock b/yarn.lock index 5ac3cd34531..2ae9655b992 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4163,15 +4163,6 @@ __metadata: languageName: node linkType: hard -"@types/debug@npm:^4.0.0": - version: 4.1.10 - resolution: "@types/debug@npm:4.1.10" - dependencies: - "@types/ms": "npm:*" - checksum: 938f79c5b610f851da9c67ecd8641a09b33ce9cb38fe4c9f4d20ee743d6bccb5d8e9a833a4cd23e0684a316622af67a0634fa706baea5a01f5219961d1976314 - languageName: node - linkType: hard - "@types/dom-speech-recognition@npm:^0.0.1": version: 0.0.1 resolution: "@types/dom-speech-recognition@npm:0.0.1" @@ -4431,7 +4422,7 @@ __metadata: languageName: node linkType: hard -"@types/mdast@npm:^3.0.0": +"@types/mdast@npm:^3.0, @types/mdast@npm:^3.0.0": version: 3.0.14 resolution: "@types/mdast@npm:3.0.14" dependencies: @@ -4440,15 +4431,6 @@ __metadata: languageName: node linkType: hard -"@types/mdast@npm:^4.0.0, @types/mdast@npm:^4.0.2": - version: 4.0.2 - resolution: "@types/mdast@npm:4.0.2" - dependencies: - "@types/unist": "npm:*" - checksum: cb2915c71eb94319880a0185459cac89b73f14f903998e1b088055cc51b71de91d7c76bda46c584e5d9c2d21d817680a3b2de56ebb737c14c641f81ad30028ed - languageName: node - linkType: hard - "@types/mime@npm:^1": version: 1.3.2 resolution: "@types/mime@npm:1.3.2" @@ -4477,13 +4459,6 @@ __metadata: languageName: node linkType: hard -"@types/ms@npm:*": - version: 0.7.33 - resolution: "@types/ms@npm:0.7.33" - checksum: 2cb5af611ace05ab2ae40422c8539850cf983197982bb04b83acf59e6e692e2faccf336a82ac4db97f7ea28f2baa0a8990fa5eb1cd72c5cab17b5b3609b0b650 - languageName: node - linkType: hard - "@types/mysql@npm:^2.15.21": version: 2.15.21 resolution: "@types/mysql@npm:2.15.21" @@ -4851,13 +4826,6 @@ __metadata: languageName: node linkType: hard -"@types/unist@npm:*, @types/unist@npm:^3.0.0": - version: 3.0.1 - resolution: "@types/unist@npm:3.0.1" - checksum: 5692b9b271724d4ab6434f125350436bb35eb8ffdaeb5b762c7282870be9562f364bc49016e4b25afb442b68ad9ee042a262c093af07738af6b8a9b858335e37 - languageName: node - linkType: hard - "@types/unist@npm:^2, @types/unist@npm:^2.0.0, @types/unist@npm:^2.0.2, @types/unist@npm:^2.0.3": version: 2.0.9 resolution: "@types/unist@npm:2.0.9" @@ -6510,13 +6478,6 @@ __metadata: languageName: node linkType: hard -"character-entities@npm:^2.0.0": - version: 2.0.2 - resolution: "character-entities@npm:2.0.2" - checksum: c8dd1f4bf1a92fccf7d2fad9673660a88b37854557d30f6076c32fedfb92d1420208298829ff1d3b6b4fa1c7012e8326c45e7f5c3ed1e9a09ec177593c521b2f - languageName: node - linkType: hard - "character-reference-invalid@npm:^1.0.0": version: 1.1.4 resolution: "character-reference-invalid@npm:1.1.4" @@ -7928,15 +7889,6 @@ __metadata: languageName: node linkType: hard -"decode-named-character-reference@npm:^1.0.0": - version: 1.0.2 - resolution: "decode-named-character-reference@npm:1.0.2" - dependencies: - character-entities: "npm:^2.0.0" - checksum: f4c71d3b93105f20076052f9cb1523a22a9c796b8296cd35eef1ca54239c78d182c136a848b83ff8da2071e3ae2b1d300bf29d00650a6d6e675438cc31b11d78 - languageName: node - linkType: hard - "decode-uri-component@npm:^0.2.0": version: 0.2.2 resolution: "decode-uri-component@npm:0.2.2" @@ -8101,13 +8053,6 @@ __metadata: languageName: node linkType: hard -"dequal@npm:^2.0.0": - version: 2.0.3 - resolution: "dequal@npm:2.0.3" - checksum: 6ff05a7561f33603df87c45e389c9ac0a95e3c056be3da1a0c4702149e3a7f6fe5ffbb294478687ba51a9e95f3a60e8b6b9005993acd79c292c7d15f71964b6b - languageName: node - linkType: hard - "destroy@npm:1.2.0": version: 1.2.0 resolution: "destroy@npm:1.2.0" @@ -8143,15 +8088,6 @@ __metadata: languageName: node linkType: hard -"devlop@npm:^1.0.0": - version: 1.1.0 - resolution: "devlop@npm:1.1.0" - dependencies: - dequal: "npm:^2.0.0" - checksum: 3cc5f903d02d279d6dc4aa71ab6ed9898b9f4d1f861cc5421ce7357893c21b9520de78afb203c92bd650a6977ad0ca98195453a0707a39958cf5fea3b0a8ddd8 - languageName: node - linkType: hard - "diff-sequences@npm:^29.6.3": version: 29.6.3 resolution: "diff-sequences@npm:29.6.3" @@ -10497,7 +10433,7 @@ __metadata: "@types/jsonwebtoken": "npm:^9.0.0" "@types/lodash": "npm:^4.14.185" "@types/md5": "npm:^2.3.2" - "@types/mdast": "npm:^4.0.2" + "@types/mdast": "npm:^3.0" "@types/minimist": "npm:^1.2.2" "@types/mousetrap": "npm:^1.6.9" "@types/mysql": "npm:^2.15.21" @@ -10582,7 +10518,7 @@ __metadata: lodash: "npm:^4.17.20" mathjax-full: "npm:^3.1.0" md5: "npm:^2.3.0" - mdast-util-from-markdown: "npm:^2.0.0" + mdast-util-from-markdown: "npm:^0.8.0" minimist: "npm:^1.2.6" mobx: "npm:^5.15.7" mobx-formatters: "npm:^1.0.2" @@ -13674,26 +13610,6 @@ __metadata: languageName: node linkType: hard -"mdast-util-from-markdown@npm:^2.0.0": - version: 2.0.0 - resolution: "mdast-util-from-markdown@npm:2.0.0" - dependencies: - "@types/mdast": "npm:^4.0.0" - "@types/unist": "npm:^3.0.0" - decode-named-character-reference: "npm:^1.0.0" - devlop: "npm:^1.0.0" - mdast-util-to-string: "npm:^4.0.0" - micromark: "npm:^4.0.0" - micromark-util-decode-numeric-character-reference: "npm:^2.0.0" - micromark-util-decode-string: "npm:^2.0.0" - micromark-util-normalize-identifier: "npm:^2.0.0" - micromark-util-symbol: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - unist-util-stringify-position: "npm:^4.0.0" - checksum: 960e28a8ff3d989cc25a615d14e9a1d95d145b938dc08323ce44689be6dd052ece544d2acf5242cedb8ad6ccdc3ffe854989b7c2516c6e62f2fca42b6d11a2da - languageName: node - linkType: hard - "mdast-util-to-hast@npm:^10.2.0": version: 10.2.0 resolution: "mdast-util-to-hast@npm:10.2.0" @@ -13717,15 +13633,6 @@ __metadata: languageName: node linkType: hard -"mdast-util-to-string@npm:^4.0.0": - version: 4.0.0 - resolution: "mdast-util-to-string@npm:4.0.0" - dependencies: - "@types/mdast": "npm:^4.0.0" - checksum: f4a5dbb9ea03521d7d3e26a9ba5652a1d6fbd55706dddd2155427517085688830e0ecd3f12418cfd40892640886eb39a4034c3c967d85e01e2fa64cfb53cff05 - languageName: node - linkType: hard - "mdn-data@npm:2.0.28": version: 2.0.28 resolution: "mdn-data@npm:2.0.28" @@ -13808,242 +13715,6 @@ __metadata: languageName: node linkType: hard -"micromark-core-commonmark@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-core-commonmark@npm:2.0.0" - dependencies: - decode-named-character-reference: "npm:^1.0.0" - devlop: "npm:^1.0.0" - micromark-factory-destination: "npm:^2.0.0" - micromark-factory-label: "npm:^2.0.0" - micromark-factory-space: "npm:^2.0.0" - micromark-factory-title: "npm:^2.0.0" - micromark-factory-whitespace: "npm:^2.0.0" - micromark-util-character: "npm:^2.0.0" - micromark-util-chunked: "npm:^2.0.0" - micromark-util-classify-character: "npm:^2.0.0" - micromark-util-html-tag-name: "npm:^2.0.0" - micromark-util-normalize-identifier: "npm:^2.0.0" - micromark-util-resolve-all: "npm:^2.0.0" - micromark-util-subtokenize: "npm:^2.0.0" - micromark-util-symbol: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - checksum: 67f6e2f062f42a7ae21e8a409f3663843703a830ff27cf0f41cb0fb712c58e55409db428531d8124c4ef8d698cd81e7eb41485d24b8c352d2f0c06b535865367 - languageName: node - linkType: hard - -"micromark-factory-destination@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-factory-destination@npm:2.0.0" - dependencies: - micromark-util-character: "npm:^2.0.0" - micromark-util-symbol: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - checksum: d36e65ed1c072ff4148b016783148ba7c68a078991154625723e24bda3945160268fb91079fb28618e1613c2b6e70390a8ddc544c45410288aa27b413593071a - languageName: node - linkType: hard - -"micromark-factory-label@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-factory-label@npm:2.0.0" - dependencies: - devlop: "npm:^1.0.0" - micromark-util-character: "npm:^2.0.0" - micromark-util-symbol: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - checksum: c021dbd0ed367610d35f2bae21209bc804d1a6d1286ffce458fd6a717f4d7fe581a7cba7d5c2d7a63757c44eb927c80d6a571d6ea7969fae1b48ab6461d109c4 - languageName: node - linkType: hard - -"micromark-factory-space@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-factory-space@npm:2.0.0" - dependencies: - micromark-util-character: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - checksum: 4ffdcdc2f759887bbb356500cb460b3915ecddcb5d85c3618d7df68ad05d13ed02b1153ee1845677b7d8126df8f388288b84fcf0d943bd9c92bcc71cd7222e37 - languageName: node - linkType: hard - -"micromark-factory-title@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-factory-title@npm:2.0.0" - dependencies: - micromark-factory-space: "npm:^2.0.0" - micromark-util-character: "npm:^2.0.0" - micromark-util-symbol: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - checksum: 39e1ac23af3554e6e652e56065579bc7faf21ade7b8704b29c175871b4152b7109b790bb3cae0f7e088381139c6bac9553b8400772c3d322e4fa635f813a3578 - languageName: node - linkType: hard - -"micromark-factory-whitespace@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-factory-whitespace@npm:2.0.0" - dependencies: - micromark-factory-space: "npm:^2.0.0" - micromark-util-character: "npm:^2.0.0" - micromark-util-symbol: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - checksum: 9587c2546d1a58b4d5472b42adf05463f6212d0449455285662d63cd8eaed89c6b159ac82713fcee5f9dd88628c24307d9533cccd8971a2f3f4d48702f8f850a - languageName: node - linkType: hard - -"micromark-util-character@npm:^2.0.0": - version: 2.0.1 - resolution: "micromark-util-character@npm:2.0.1" - dependencies: - micromark-util-symbol: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - checksum: 6eb5e58c6ae5f416f71a2b777544d3118fdb04d4fd62ea27f7920d0c58fa56ddd3fe17331fbba7f0c70fa6f90bdf7910e8e951f018f0500f883369d64fd6b925 - languageName: node - linkType: hard - -"micromark-util-chunked@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-chunked@npm:2.0.0" - dependencies: - micromark-util-symbol: "npm:^2.0.0" - checksum: 324f95cccdae061332a8241936eaba6ef0782a1e355bac5c607ad2564fd3744929be7dc81651315a2921535747a33243e6a5606bcb64b7a56d49b6d74ea1a3d4 - languageName: node - linkType: hard - -"micromark-util-classify-character@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-classify-character@npm:2.0.0" - dependencies: - micromark-util-character: "npm:^2.0.0" - micromark-util-symbol: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - checksum: 086e52904deffebb793fb1c08c94aabb8901f76958142dfc3a6282890ebaa983b285e69bd602b9d507f1b758ed38e75a994d2ad9fbbefa7de2584f67a16af405 - languageName: node - linkType: hard - -"micromark-util-combine-extensions@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-combine-extensions@npm:2.0.0" - dependencies: - micromark-util-chunked: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - checksum: 107c47700343f365b4ed81551e18bc3458b573c500e56ac052b2490bd548adc475216e41d2271633a8867fac66fc22ba3e0a2d74a31ed79b9870ca947eb4e3ba - languageName: node - linkType: hard - -"micromark-util-decode-numeric-character-reference@npm:^2.0.0": - version: 2.0.1 - resolution: "micromark-util-decode-numeric-character-reference@npm:2.0.1" - dependencies: - micromark-util-symbol: "npm:^2.0.0" - checksum: 9512507722efd2033a9f08715eeef787fbfe27e23edf55db21423d46d82ab46f76c89b4f960be3f5e50a2d388d89658afc0647989cf256d051e9ea01277a1adb - languageName: node - linkType: hard - -"micromark-util-decode-string@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-decode-string@npm:2.0.0" - dependencies: - decode-named-character-reference: "npm:^1.0.0" - micromark-util-character: "npm:^2.0.0" - micromark-util-decode-numeric-character-reference: "npm:^2.0.0" - micromark-util-symbol: "npm:^2.0.0" - checksum: a75daf32a4a6b549e9f19b4d833ebfeb09a32a9a1f9ce50f35dec6b6a3e4f9f121f49024ba7f9c91c55ebe792f7c7a332fc9604795181b6a612637df0df5b959 - languageName: node - linkType: hard - -"micromark-util-encode@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-encode@npm:2.0.0" - checksum: 853a3f33fce72aaf4ffa60b7f2b6fcfca40b270b3466e1b96561b02185d2bd8c01dd7948bc31a24ac014f4cc854e545ca9a8e9cf7ea46262f9d24c9e88551c66 - languageName: node - linkType: hard - -"micromark-util-html-tag-name@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-html-tag-name@npm:2.0.0" - checksum: d786d4486f93eb0ac5b628779809ca97c5dc60f3c9fc03eb565809831db181cf8cb7f05f9ac76852f3eb35461af0f89fa407b46f3a03f4f97a96754d8dc540d8 - languageName: node - linkType: hard - -"micromark-util-normalize-identifier@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-normalize-identifier@npm:2.0.0" - dependencies: - micromark-util-symbol: "npm:^2.0.0" - checksum: b36da2d3fd102053dadd953ce5c558328df12a63a8ac0e5aad13d4dda8e43b6a5d4a661baafe0a1cd8a260bead4b4a8e6e0e74193dd651e8484225bd4f4e68aa - languageName: node - linkType: hard - -"micromark-util-resolve-all@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-resolve-all@npm:2.0.0" - dependencies: - micromark-util-types: "npm:^2.0.0" - checksum: 31fe703b85572cb3f598ebe32750e59516925c7ff1f66cfe6afaebe0771a395a9eaa770787f2523d3c46082ea80e6c14f83643303740b3d650af7c96ebd30ccc - languageName: node - linkType: hard - -"micromark-util-sanitize-uri@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-sanitize-uri@npm:2.0.0" - dependencies: - micromark-util-character: "npm:^2.0.0" - micromark-util-encode: "npm:^2.0.0" - micromark-util-symbol: "npm:^2.0.0" - checksum: 7d10622f5a2bb058dda6d2e95b2735c43fdf8daa4f88a0863bc90eef6598f8e10e3df98e034341fcbc090d8021c53501308c463c49d3fe91f41eb64b5bf2766e - languageName: node - linkType: hard - -"micromark-util-subtokenize@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-subtokenize@npm:2.0.0" - dependencies: - devlop: "npm:^1.0.0" - micromark-util-chunked: "npm:^2.0.0" - micromark-util-symbol: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - checksum: 4d209894f9400ff73e093a4ce3d13870cd1f546b47e50355f849c4402cecd5d2039bd63bb624f2a09aaeba01a847634088942edb42f141e4869b3a85281cf64e - languageName: node - linkType: hard - -"micromark-util-symbol@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-symbol@npm:2.0.0" - checksum: 8c662644c326b384f02a5269974d843d400930cf6f5d6a8e6db1743fc8933f5ecc125b4203ad4ebca25447f5d23eb7e5bf1f75af34570c3fdd925cb618752fcd - languageName: node - linkType: hard - -"micromark-util-types@npm:^2.0.0": - version: 2.0.0 - resolution: "micromark-util-types@npm:2.0.0" - checksum: b88e0eefd4b7c8d86b54dbf4ed0094ef56a3b0c7774d040bd5c8146b8e4e05b1026bbf1cd9308c8fcd05ecdc0784507680c8cee9888a4d3c550e6e574f7aef62 - languageName: node - linkType: hard - -"micromark@npm:^4.0.0": - version: 4.0.0 - resolution: "micromark@npm:4.0.0" - dependencies: - "@types/debug": "npm:^4.0.0" - debug: "npm:^4.0.0" - decode-named-character-reference: "npm:^1.0.0" - devlop: "npm:^1.0.0" - micromark-core-commonmark: "npm:^2.0.0" - micromark-factory-space: "npm:^2.0.0" - micromark-util-character: "npm:^2.0.0" - micromark-util-chunked: "npm:^2.0.0" - micromark-util-combine-extensions: "npm:^2.0.0" - micromark-util-decode-numeric-character-reference: "npm:^2.0.0" - micromark-util-encode: "npm:^2.0.0" - micromark-util-normalize-identifier: "npm:^2.0.0" - micromark-util-resolve-all: "npm:^2.0.0" - micromark-util-sanitize-uri: "npm:^2.0.0" - micromark-util-subtokenize: "npm:^2.0.0" - micromark-util-symbol: "npm:^2.0.0" - micromark-util-types: "npm:^2.0.0" - checksum: a697c1c0c169077f5d5def9af26985baea9d4375395dcb974a96f63761d382b455d4595a60e856c83e653b1272a732e85128d992511d6dc938d61a35bdf98c99 - languageName: node - linkType: hard - "micromark@npm:~2.11.0": version: 2.11.4 resolution: "micromark@npm:2.11.4" @@ -19890,15 +19561,6 @@ __metadata: languageName: node linkType: hard -"unist-util-stringify-position@npm:^4.0.0": - version: 4.0.0 - resolution: "unist-util-stringify-position@npm:4.0.0" - dependencies: - "@types/unist": "npm:^3.0.0" - checksum: d15c88aca7a31902d95d5b5355bbe09583cf6f6ff6e59e134ef76c76d3c30bc1021f2d7ea5b7897c6d0858ed5f3770c1b19de9c78274f50d72f95a0d05f1af71 - languageName: node - linkType: hard - "unist-util-visit-parents@npm:^2.0.0": version: 2.1.2 resolution: "unist-util-visit-parents@npm:2.1.2" From a285df56235c837febebd28c83fed891b72a0e53 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 1 Nov 2023 13:02:31 +0100 Subject: [PATCH 04/19] :honeybee: remove dead code --- .../src/MarkdownTextWrap/MarkdownTextWrap.tsx | 57 ------------------- 1 file changed, 57 deletions(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index ec147e19cdc..73644934ace 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -495,63 +495,6 @@ export const sumTextWrapHeights = ( sum(elements.map((element) => element.height)) + (elements.length - 1) * spacer -// export function parsimmonToTextTokens( -// nodes: EveryMarkdownChildNode[], -// fontParams?: IRFontParams -// ): IRToken[] { -// return nodes.map((node): IRToken => { -// if (node.type === "text") { -// return new IRText(node.value, fontParams) -// } else if (node.type === "newline") { -// return new IRLineBreak() -// } else if (node.type === "whitespace") { -// return new IRWhitespace(fontParams) -// } else if ( -// node.type === "bold" || -// node.type === "plainBold" || -// node.type === "boldWithoutItalic" -// ) { -// return new IRBold( -// parsimmonToTextTokens(node.children, { -// ...fontParams, -// fontWeight: 700, -// }) -// ) -// } else if ( -// node.type === "italic" || -// node.type === "plainItalic" || -// node.type === "italicWithoutBold" -// ) { -// return new IRItalic( -// parsimmonToTextTokens(node.children, { -// ...fontParams, -// isItalic: true, -// }) -// ) -// } else if (node.type === "plainUrl") { -// return new IRLink( -// node.href, -// parsimmonToTextTokens( -// [{ type: "text", value: node.href }], -// fontParams -// ) -// ) -// } else if (node.type === "markdownLink") { -// return new IRLink( -// node.href, -// parsimmonToTextTokens(node.children, fontParams) -// ) -// } else if (node.type === "detailOnDemand") { -// return new IRDetailOnDemand( -// node.term, -// parsimmonToTextTokens(node.children, fontParams) -// ) -// } else { -// throw new Error(`Unknown node type: ${(node as any).type}`) -// } -// }) -// } - type MarkdownTextWrapProps = { text: string fontSize: number From a45cd7123a297750d73b28b5287b50f1f0e7940c Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 1 Nov 2023 14:32:58 +0100 Subject: [PATCH 05/19] :bug: fix font params not being passed through --- .vscode/launch.json | 30 +++-- .../MarkdownTextWrap/MarkdownTextWrap.test.ts | 4 +- .../src/MarkdownTextWrap/MarkdownTextWrap.tsx | 117 +++++++++++++----- 3 files changed, 109 insertions(+), 42 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 9cb3e25081e..88e0a027bea 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,12 +5,16 @@ "version": "0.2.0", "configurations": [ { - "args": [], + "args": [ + "Markdown" + ], "internalConsoleOptions": "openOnSessionStart", "name": "Jest Tests", "program": "${workspaceFolder}/node_modules/jest/bin/jest.js", "request": "launch", - "skipFiles": ["/**"], + "skipFiles": [ + "/**" + ], "type": "node" }, { @@ -19,7 +23,10 @@ "request": "launch", "name": "Jest Test current file", "program": "${workspaceFolder}/node_modules/.bin/jest", - "args": ["${fileBasenameNoExtension}.js", "--watch"], + "args": [ + "${fileBasenameNoExtension}.js", + "--watch" + ], "console": "integratedTerminal" // "internalConsoleOptions": "neverOpen" }, @@ -53,23 +60,32 @@ "name": "Run migrate WP to ArchieML", "program": "${workspaceFolder}/itsJustJavascript/db/migrateWpPostsToArchieMl.js", "request": "launch", - "skipFiles": ["/**"], + "skipFiles": [ + "/**" + ], "type": "node" }, { "name": "Sync WP posts to grapher", "program": "${workspaceFolder}/itsJustJavascript/db/syncPostsToGrapher.js", "request": "launch", - "skipFiles": ["/**"], + "skipFiles": [ + "/**" + ], "type": "node" }, { "name": "Run SVGTester", "program": "${workspaceFolder}/itsJustJavascript/devTools/svgTester/verify-graphs.js", "request": "launch", - "skipFiles": ["/**"], + "skipFiles": [ + "/**" + ], "type": "node", - "args": ["-g", "367"] + "args": [ + "-g", + "367" + ] }, { "name": "Launch admin server", diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.test.ts b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.test.ts index 0dd456d5c83..052853722d7 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.test.ts +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.test.ts @@ -63,7 +63,7 @@ describe("MarkdownTextWrap", () => { it("should calculate height correctly", () => { const element = new MarkdownTextWrap({ - text: "a\nb\nc", + text: "a \nb \nc", fontSize: 10, lineHeight: 1.5, }) @@ -83,7 +83,7 @@ describe("MarkdownTextWrap", () => { it("should split on newline", () => { const element = new MarkdownTextWrap({ - text: "_test\n**\nnewline\n**_test", + text: "_test \n** \nnewline \n**_test", fontSize: 10, lineHeight: 1, }) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 73644934ace..8f61bdb3327 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -535,7 +535,7 @@ export class MarkdownTextWrap extends React.Component { } @computed get tokensFromMarkdown(): IRToken[] { - const tokens = convertMarkdownToIRTokens(this.text) + const tokens = convertMarkdownToIRTokens(this.text, this.fontParams) return tokens } @@ -683,18 +683,29 @@ function MarkdownTextWrapLine({ line }: { line: IRToken[] }): JSX.Element { ) } -export function convertMarkdownToIRTokens(markdown: string): IRToken[] { +export function convertMarkdownToIRTokens( + markdown: string, + fontParams?: IRFontParams +): IRToken[] { const ast = fromMarkdown(markdown) - return convertMarkdownRootToIRTokens(ast) + return convertMarkdownRootToIRTokens(ast, fontParams) } -function convertMarkdownRootToIRTokens(node: Root): IRToken[] { - return node.children.flatMap(convertMarkdownNodeToIRTokens) +function convertMarkdownRootToIRTokens( + node: Root, + fontParams?: IRFontParams +): IRToken[] { + return node.children.flatMap((item) => + convertMarkdownNodeToIRTokens(item, fontParams) + ) } // When using mdast types version 4 this should be typed as: // node: RootContentMap[keyof RootContentMap] -function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { +function convertMarkdownNodeToIRTokens( + node: Content, + fontParams: IRFontParams = {} +): IRToken[] { const converted = match(node) .with( { @@ -702,7 +713,7 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { }, (item) => { return item.children.flatMap((child) => - convertMarkdownNodeToIRTokens(child) + convertMarkdownNodeToIRTokens(child, fontParams) ) } ) @@ -710,7 +721,7 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { { type: "break", }, - (item) => { + (_) => { return [new IRLineBreak()] } ) @@ -719,7 +730,7 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "code", }, (item) => { - return [new IRText(item.value)] + return [new IRText(item.value, fontParams)] } ) .with( @@ -729,7 +740,12 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { (item) => { return [ new IRItalic( - item.children.flatMap(convertMarkdownNodeToIRTokens) + item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, { + ...fontParams, + isItalic: true, + }) + ) ), ] } @@ -739,7 +755,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "heading", }, (item) => { - return item.children.flatMap(convertMarkdownNodeToIRTokens) + return item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) } ) .with( @@ -747,7 +765,7 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "html", }, (item) => { - return [new IRText(item.value)] + return [new IRText(item.value, fontParams)] } ) .with( @@ -755,7 +773,7 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "image", }, (item) => { - return [new IRText(item.alt ?? "")] + return [new IRText(item.alt ?? "", fontParams)] } ) .with( @@ -763,7 +781,7 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "inlineCode", }, (item) => { - return [new IRText(item.value)] + return [new IRText(item.value, fontParams)] } ) .with( @@ -774,7 +792,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { return [ new IRLink( item.url, - item.children.flatMap(convertMarkdownNodeToIRTokens) + item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) ), ] } @@ -784,7 +804,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "list", }, (item) => { - return item.children.flatMap(convertMarkdownNodeToIRTokens) + return item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) } ) .with( @@ -792,7 +814,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "listItem", }, (item) => { - return item.children.flatMap(convertMarkdownNodeToIRTokens) + return item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) } ) .with( @@ -800,7 +824,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "paragraph", }, (item) => { - return item.children.flatMap(convertMarkdownNodeToIRTokens) + return item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) } ) .with( @@ -810,7 +836,12 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { (item) => { return [ new IRBold( - item.children.flatMap(convertMarkdownNodeToIRTokens) + item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, { + ...fontParams, + fontWeight: 700, + }) + ) ), ] } @@ -820,15 +851,15 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "text", }, (item) => { - return [new IRText(item.value)] + return [new IRText(item.value, fontParams)] } ) .with( { type: "thematicBreak", }, - (item) => { - return [new IRText("---")] + (_) => { + return [new IRText("---", fontParams)] } ) .with( @@ -836,7 +867,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "delete", }, (item) => { - return item.children.flatMap(convertMarkdownNodeToIRTokens) + return item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) } ) // Now lets finish this with blocks for FootnoteDefinition, Definition, ImageReference, LinkReference, FootnoteReference, and Table @@ -845,7 +878,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "footnoteDefinition", }, (item) => { - return item.children.flatMap(convertMarkdownNodeToIRTokens) + return item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) } ) .with( @@ -853,7 +888,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "definition", }, (item) => { - return [new IRText(`${item.identifier}: ${item.label}`)] + return [ + new IRText(`${item.identifier}: ${item.label}`, fontParams), + ] } ) .with( @@ -861,7 +898,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "imageReference", }, (item) => { - return [new IRText(`${item.identifier}: ${item.label}`)] + return [ + new IRText(`${item.identifier}: ${item.label}`, fontParams), + ] } ) .with( @@ -869,7 +908,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "linkReference", }, (item) => { - return [new IRText(`${item.identifier}: ${item.label}`)] + return [ + new IRText(`${item.identifier}: ${item.label}`, fontParams), + ] } ) .with( @@ -877,7 +918,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "footnoteReference", }, (item) => { - return [new IRText(`${item.identifier}: ${item.label}`)] + return [ + new IRText(`${item.identifier}: ${item.label}`, fontParams), + ] } ) .with( @@ -885,7 +928,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "table", }, (item) => { - return item.children.flatMap(convertMarkdownNodeToIRTokens) + return item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) } ) .with( @@ -893,7 +938,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "tableCell", }, (item) => { - return item.children.flatMap(convertMarkdownNodeToIRTokens) + return item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) } ) // and now TableRow and Yaml @@ -902,7 +949,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "tableRow", }, (item) => { - return item.children.flatMap(convertMarkdownNodeToIRTokens) + return item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) } ) .with( @@ -910,7 +959,7 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "yaml", }, (item) => { - return [new IRText(item.value)] + return [new IRText(item.value, fontParams)] } ) .with( @@ -918,7 +967,9 @@ function convertMarkdownNodeToIRTokens(node: Content): IRToken[] { type: "footnote", }, (item) => { - return item.children.flatMap(convertMarkdownNodeToIRTokens) + return item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) } ) .exhaustive() From 199c51cc7dea948e141ce6ec37e5631d252bfc07 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 1 Nov 2023 15:34:52 +0100 Subject: [PATCH 06/19] :honeybee: remove unnecessary import --- packages/@ourworldindata/components/src/SimpleMarkdownText.tsx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx b/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx index 618bc675729..2a29c9fdf10 100644 --- a/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx +++ b/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx @@ -1,6 +1,8 @@ import React from "react" import { computed } from "mobx" import { Remark } from "react-remark" +import visit from "unist-util-visit" + type SimpleMarkdownTextProps = { text: string } From 03cb3eb6f9673a3c20aaa0065be5b2de0cf6c80f Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 1 Nov 2023 16:03:25 +0100 Subject: [PATCH 07/19] :hammer: fix import issue, broken line breaks --- .../src/MarkdownTextWrap/MarkdownTextWrap.tsx | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 8f61bdb3327..e09d88dcde3 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -13,7 +13,7 @@ import { } from "@ourworldindata/utils" import { TextWrap } from "../TextWrap/TextWrap.js" import fromMarkdown from "mdast-util-from-markdown" -import { Root, Content } from "mdast" +import type { Root, Content } from "mdast" import { match } from "ts-pattern" const SUPERSCRIPT_NUMERALS = { @@ -851,7 +851,16 @@ function convertMarkdownNodeToIRTokens( type: "text", }, (item) => { - return [new IRText(item.value, fontParams)] + const splitted = item.value.split(/(\s+)/) + const tokens = splitted.flatMap((text, i) => { + if (i < splitted.length - 1) { + return [ + new IRText(text, fontParams), + new IRWhitespace(fontParams), + ] + } else return [new IRText(text, fontParams)] + }) + return tokens } ) .with( From aa1c338236665f2f954c565eb3e04d0bc30f3e75 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 1 Nov 2023 16:12:19 +0100 Subject: [PATCH 08/19] :honeybee: remove launch.json arg --- .vscode/launch.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 88e0a027bea..4ef08867736 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,9 +5,7 @@ "version": "0.2.0", "configurations": [ { - "args": [ - "Markdown" - ], + "args": [], "internalConsoleOptions": "openOnSessionStart", "name": "Jest Tests", "program": "${workspaceFolder}/node_modules/jest/bin/jest.js", From d6ed296cc8db71a699fdf49f356c2409f02c92f4 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 1 Nov 2023 16:28:17 +0100 Subject: [PATCH 09/19] :honeybee: fix lint issues --- .prettierignore | 1 + packages/@ourworldindata/components/src/SimpleMarkdownText.tsx | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.prettierignore b/.prettierignore index 39ebada93d8..e37f3f047b1 100644 --- a/.prettierignore +++ b/.prettierignore @@ -14,3 +14,4 @@ coverage packages/@ourworldindata/*/dist/ dist/ grapherData/ +.vscode/ diff --git a/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx b/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx index 2a29c9fdf10..315b3b8d018 100644 --- a/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx +++ b/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx @@ -1,7 +1,6 @@ import React from "react" import { computed } from "mobx" import { Remark } from "react-remark" -import visit from "unist-util-visit" type SimpleMarkdownTextProps = { text: string From 3b7029ba1fd7ce800a8b0f4591cf9c4b363077f1 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 1 Nov 2023 18:28:18 +0100 Subject: [PATCH 10/19] :hammer: translate dod links specially for charts --- .../src/MarkdownTextWrap/MarkdownTextWrap.tsx | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index e09d88dcde3..4ed5d4b7685 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -59,7 +59,7 @@ export class IRText implements IRToken { return Bounds.forText(this.text, this.fontParams).width } @imemo get height(): number { - return this.fontParams?.fontSize || 16 + return this.fontParams?.fontSize || 13 } getBreakpointBefore(): undefined { return undefined @@ -789,14 +789,26 @@ function convertMarkdownNodeToIRTokens( type: "link", }, (item) => { - return [ - new IRLink( - item.url, - item.children.flatMap((child) => - convertMarkdownNodeToIRTokens(child, fontParams) - ) - ), - ] + if (item.url.startsWith("#dod:")) { + const term = item.url.replace("#dod:", "") + return [ + new IRDetailOnDemand( + term, + item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ), + fontParams + ), + ] + } else + return [ + new IRLink( + item.url, + item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ) + ), + ] } ) .with( From b761b44a4fd6c16a37a9a160f0d8e912c81d338d Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 1 Nov 2023 18:44:28 +0100 Subject: [PATCH 11/19] :bug: fix double whitespace issue --- .../src/MarkdownTextWrap/MarkdownTextWrap.tsx | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 4ed5d4b7685..1772b2a69a8 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -864,14 +864,9 @@ function convertMarkdownNodeToIRTokens( }, (item) => { const splitted = item.value.split(/(\s+)/) - const tokens = splitted.flatMap((text, i) => { - if (i < splitted.length - 1) { - return [ - new IRText(text, fontParams), - new IRWhitespace(fontParams), - ] - } else return [new IRText(text, fontParams)] - }) + const tokens = splitted.map( + (text, i) => new IRText(text, fontParams) + ) return tokens } ) From 75b55951cbb210ffe7300ddf68d2990196e4c9ca Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 1 Nov 2023 18:52:57 +0100 Subject: [PATCH 12/19] :bug: fix whitespace svg compatible --- .../src/MarkdownTextWrap/MarkdownTextWrap.tsx | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 1772b2a69a8..5e39ef7e499 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -863,10 +863,15 @@ function convertMarkdownNodeToIRTokens( type: "text", }, (item) => { - const splitted = item.value.split(/(\s+)/) - const tokens = splitted.map( - (text, i) => new IRText(text, fontParams) - ) + const splitted = item.value.split(/\s+/) + const tokens = splitted.flatMap((text, i) => { + if (i < splitted.length - 1) { + return [ + new IRText(text, fontParams), + new IRWhitespace(fontParams), + ] + } else return [new IRText(text, fontParams)] + }) return tokens } ) From 8e6ac97d33f6ba726aa73e364bbd263f162f5451 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 2 Nov 2023 10:03:02 +0100 Subject: [PATCH 13/19] :sparkles: match old newline behaviour for charts --- .../src/MarkdownTextWrap/MarkdownTextWrap.tsx | 35 ++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 5e39ef7e499..d471c456c0f 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -524,7 +524,31 @@ export class MarkdownTextWrap extends React.Component { } } @computed get text(): string { - return this.props.text + // NOTE: ā—Here we deviate from the normal markdown spec. We replace \n with \\\n to make sure that single \n are treated as + // actual line breaks but only if none of the other markdown line break rules apply. + // This is a bit different to how markdown usually works but we have a substantial + // amount of legacy charts that use newlines in this way and it seems that it is + // better to support this simple case than to do a data migration of many chart subtitles. + const baseText = this.props.text + // This replace is a bit funky - we want to make sure that single \n are treated as + // actual line breaks but only if none of the other markdown line break rules apply. + // These are: + // - \n\n is always a new paragrah + // - Two spaces before \n is a line break (this rule is not entirely checked as we only check for a single space) + // - A backslash before \n is a line break + // The regex tries to find \n that do not belong to any of the three cases above and if none of those apply we + // replace the \n in the mached 3 characters with a backslash preceding \n to make sure it will be a line break. + // return baseText.replaceAll(/[^\n \\]\n[^\n]/g, (match) => + // match.replace("\n", "\\\n") + // ) + let text = baseText + text = text.replaceAll("\n\n", "@@PARAGRAPH@@") + text = text.replaceAll("\\\n", "@@LINEBREAK@@") + text = text.replaceAll(" \n", "@@LINEBREAK@@") + text = text.replaceAll("\n", "\\\n") + text = text.replaceAll("@@LINEBREAK@@", "\\\n") + text = text.replaceAll("@@PARAGRAPH@@", "\n\n") + return text } @computed get detailsOrderedByReference(): Set { return this.props.detailsOrderedByReference || new Set() @@ -836,9 +860,12 @@ function convertMarkdownNodeToIRTokens( type: "paragraph", }, (item) => { - return item.children.flatMap((child) => - convertMarkdownNodeToIRTokens(child, fontParams) - ) + return [ + ...item.children.flatMap((child) => + convertMarkdownNodeToIRTokens(child, fontParams) + ), + new IRLineBreak(), + ] } ) .with( From b62702165acbe511e4f87b23e5af9238d3897af0 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 2 Nov 2023 12:40:32 +0100 Subject: [PATCH 14/19] :bug: fix issue with superfluous newline --- .../src/MarkdownTextWrap/MarkdownTextWrap.test.ts | 4 ++-- .../components/src/MarkdownTextWrap/MarkdownTextWrap.tsx | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.test.ts b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.test.ts index 052853722d7..05da1aaffe3 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.test.ts +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.test.ts @@ -63,7 +63,7 @@ describe("MarkdownTextWrap", () => { it("should calculate height correctly", () => { const element = new MarkdownTextWrap({ - text: "a \nb \nc", + text: "a\nb\nc", fontSize: 10, lineHeight: 1.5, }) @@ -83,7 +83,7 @@ describe("MarkdownTextWrap", () => { it("should split on newline", () => { const element = new MarkdownTextWrap({ - text: "_test \n** \nnewline \n**_test", + text: "_test\n**\nnewlineyarn \n**_test", fontSize: 10, lineHeight: 1, }) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index d471c456c0f..695ef647483 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -541,13 +541,12 @@ export class MarkdownTextWrap extends React.Component { // return baseText.replaceAll(/[^\n \\]\n[^\n]/g, (match) => // match.replace("\n", "\\\n") // ) - let text = baseText - text = text.replaceAll("\n\n", "@@PARAGRAPH@@") + let text = baseText.trim() + text = text.replaceAll("\n\n", "@@LINEBREAK@@") text = text.replaceAll("\\\n", "@@LINEBREAK@@") text = text.replaceAll(" \n", "@@LINEBREAK@@") text = text.replaceAll("\n", "\\\n") text = text.replaceAll("@@LINEBREAK@@", "\\\n") - text = text.replaceAll("@@PARAGRAPH@@", "\n\n") return text } @computed get detailsOrderedByReference(): Set { @@ -864,7 +863,6 @@ function convertMarkdownNodeToIRTokens( ...item.children.flatMap((child) => convertMarkdownNodeToIRTokens(child, fontParams) ), - new IRLineBreak(), ] } ) From 63107fd56aa14e94ad0e839c7044321c0501df8d Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 7 Nov 2023 14:19:57 +0100 Subject: [PATCH 15/19] Update packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx Co-authored-by: Marcel Gerber --- .../components/src/MarkdownTextWrap/MarkdownTextWrap.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 695ef647483..17466225639 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -533,11 +533,11 @@ export class MarkdownTextWrap extends React.Component { // This replace is a bit funky - we want to make sure that single \n are treated as // actual line breaks but only if none of the other markdown line break rules apply. // These are: - // - \n\n is always a new paragrah + // - \n\n is always a new paragraph // - Two spaces before \n is a line break (this rule is not entirely checked as we only check for a single space) // - A backslash before \n is a line break // The regex tries to find \n that do not belong to any of the three cases above and if none of those apply we - // replace the \n in the mached 3 characters with a backslash preceding \n to make sure it will be a line break. + // replace the \n in the matched 3 characters with a backslash preceding \n to make sure it will be a line break. // return baseText.replaceAll(/[^\n \\]\n[^\n]/g, (match) => // match.replace("\n", "\\\n") // ) From 1f067e971a1076e9c94cbcfb2c0a0bd6f9b29b06 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 7 Nov 2023 15:30:49 +0100 Subject: [PATCH 16/19] :sparkles: add list rendering, tweak line breaks --- .../src/MarkdownTextWrap/MarkdownTextWrap.tsx | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 17466225639..818bd656738 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -524,7 +524,7 @@ export class MarkdownTextWrap extends React.Component { } } @computed get text(): string { - // NOTE: ā—Here we deviate from the normal markdown spec. We replace \n with \\\n to make sure that single \n are treated as + // NOTE: ā—Here we deviate from the normal markdown spec. We replace \n with \n to make sure that single \n are treated as // actual line breaks but only if none of the other markdown line break rules apply. // This is a bit different to how markdown usually works but we have a substantial // amount of legacy charts that use newlines in this way and it seems that it is @@ -536,17 +536,13 @@ export class MarkdownTextWrap extends React.Component { // - \n\n is always a new paragraph // - Two spaces before \n is a line break (this rule is not entirely checked as we only check for a single space) // - A backslash before \n is a line break - // The regex tries to find \n that do not belong to any of the three cases above and if none of those apply we - // replace the \n in the matched 3 characters with a backslash preceding \n to make sure it will be a line break. - // return baseText.replaceAll(/[^\n \\]\n[^\n]/g, (match) => - // match.replace("\n", "\\\n") - // ) + // The code below normalizes all cases to \n which will lead to them surviving the markdown parsing let text = baseText.trim() text = text.replaceAll("\n\n", "@@LINEBREAK@@") text = text.replaceAll("\\\n", "@@LINEBREAK@@") text = text.replaceAll(" \n", "@@LINEBREAK@@") - text = text.replaceAll("\n", "\\\n") - text = text.replaceAll("@@LINEBREAK@@", "\\\n") + text = text.replaceAll("\n", " \n") + text = text.replaceAll("@@LINEBREAK@@", " \n") return text } @computed get detailsOrderedByReference(): Set { @@ -710,15 +706,8 @@ export function convertMarkdownToIRTokens( markdown: string, fontParams?: IRFontParams ): IRToken[] { - const ast = fromMarkdown(markdown) - return convertMarkdownRootToIRTokens(ast, fontParams) -} - -function convertMarkdownRootToIRTokens( - node: Root, - fontParams?: IRFontParams -): IRToken[] { - return node.children.flatMap((item) => + const ast: Root = fromMarkdown(markdown) + return ast.children.flatMap((item: Content) => convertMarkdownNodeToIRTokens(item, fontParams) ) } @@ -839,9 +828,18 @@ function convertMarkdownNodeToIRTokens( type: "list", }, (item) => { - return item.children.flatMap((child) => - convertMarkdownNodeToIRTokens(child, fontParams) - ) + if (item.ordered) + return item.children.flatMap((child, index) => [ + new IRLineBreak(), + new IRText(`${index + 1}) `, fontParams), + ...convertMarkdownNodeToIRTokens(child, fontParams), + ]) + else + return item.children.flatMap((child, index) => [ + new IRLineBreak(), + new IRText(`ā€¢ `, fontParams), + ...convertMarkdownNodeToIRTokens(child, fontParams), + ]) } ) .with( From dbc4f1902f2086786b92895651f473a3595ee50e Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 7 Nov 2023 16:48:51 +0100 Subject: [PATCH 17/19] :hammer: make sure that there are no leading or trailing line breaks --- .../components/src/MarkdownTextWrap/MarkdownTextWrap.tsx | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 818bd656738..2c6c4d4ec3b 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -15,6 +15,7 @@ import { TextWrap } from "../TextWrap/TextWrap.js" import fromMarkdown from "mdast-util-from-markdown" import type { Root, Content } from "mdast" import { match } from "ts-pattern" +import { dropRightWhile, dropWhile } from "lodash" const SUPERSCRIPT_NUMERALS = { "0": "\u2070", @@ -707,9 +708,14 @@ export function convertMarkdownToIRTokens( fontParams?: IRFontParams ): IRToken[] { const ast: Root = fromMarkdown(markdown) - return ast.children.flatMap((item: Content) => + const children = ast.children.flatMap((item: Content) => convertMarkdownNodeToIRTokens(item, fontParams) ) + // ensure that there are no leading or trailing line breaks + return dropRightWhile( + dropWhile(children, (token) => token instanceof IRLineBreak), + (token) => token instanceof IRLineBreak + ) } // When using mdast types version 4 this should be typed as: From 8f36fae6e05e12a0205abfa1d2fbb89adfc98a1e Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 7 Nov 2023 16:52:44 +0100 Subject: [PATCH 18/19] :honeybee: fix prettier issue --- .../components/src/MarkdownTextWrap/MarkdownTextWrap.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 2c6c4d4ec3b..b2af10d05e2 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -841,7 +841,7 @@ function convertMarkdownNodeToIRTokens( ...convertMarkdownNodeToIRTokens(child, fontParams), ]) else - return item.children.flatMap((child, index) => [ + return item.children.flatMap((child) => [ new IRLineBreak(), new IRText(`ā€¢ `, fontParams), ...convertMarkdownNodeToIRTokens(child, fontParams), From 386387761d1ba8bace9e77d80d938fa6d1d425e4 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 7 Nov 2023 20:17:37 +0100 Subject: [PATCH 19/19] :hammer: move lodash imports to utils --- .../components/src/MarkdownTextWrap/MarkdownTextWrap.tsx | 3 ++- packages/@ourworldindata/utils/src/Util.ts | 4 ++++ packages/@ourworldindata/utils/src/index.ts | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index b2af10d05e2..81239787b57 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -10,12 +10,13 @@ import { get, Bounds, FontFamily, + dropWhile, + dropRightWhile, } from "@ourworldindata/utils" import { TextWrap } from "../TextWrap/TextWrap.js" import fromMarkdown from "mdast-util-from-markdown" import type { Root, Content } from "mdast" import { match } from "ts-pattern" -import { dropRightWhile, dropWhile } from "lodash" const SUPERSCRIPT_NUMERALS = { "0": "\u2070", diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index ad709a3e01d..c1cf1796c72 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -8,6 +8,8 @@ import { debounce, difference, drop, + dropRightWhile, + dropWhile, escapeRegExp, extend, findLastIndex, @@ -76,6 +78,8 @@ export { debounce, difference, drop, + dropRightWhile, + dropWhile, escapeRegExp, extend, findLastIndex, diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index f9d131ca419..02273e014b1 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -369,6 +369,8 @@ export { debounce, difference, drop, + dropRightWhile, + dropWhile, extend, findLastIndex, flatten,