diff --git a/package.json b/package.json index d99adf85805..e0e71838036 100644 --- a/package.json +++ b/package.json @@ -155,6 +155,7 @@ "lodash": "^4.17.20", "mathjax-full": "^3.1.0", "md5": "^2.3.0", + "mdast-util-find-and-replace": "1.1.1", "mdast-util-from-markdown": "^0.8.0", "minimist": "^1.2.6", "mobx": "^5.15.7", diff --git a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx index 81239787b57..d5b997c5dd3 100644 --- a/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx +++ b/packages/@ourworldindata/components/src/MarkdownTextWrap/MarkdownTextWrap.tsx @@ -17,6 +17,7 @@ import { TextWrap } from "../TextWrap/TextWrap.js" import fromMarkdown from "mdast-util-from-markdown" import type { Root, Content } from "mdast" import { match } from "ts-pattern" +import { urlRegex } from "../markdown/remarkPlainLinks.js" const SUPERSCRIPT_NUMERALS = { "0": "\u2070", @@ -895,12 +896,13 @@ function convertMarkdownNodeToIRTokens( (item) => { const splitted = item.value.split(/\s+/) const tokens = splitted.flatMap((text, i) => { + const textNode = new IRText(text, fontParams) + const node = text.match(urlRegex) + ? new IRLink(text, [textNode], fontParams) + : textNode if (i < splitted.length - 1) { - return [ - new IRText(text, fontParams), - new IRWhitespace(fontParams), - ] - } else return [new IRText(text, fontParams)] + return [node, new IRWhitespace(fontParams)] + } else return [node] }) return tokens } diff --git a/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx b/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx index 315b3b8d018..ab2f6d9b55e 100644 --- a/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx +++ b/packages/@ourworldindata/components/src/SimpleMarkdownText.tsx @@ -1,6 +1,7 @@ import React from "react" import { computed } from "mobx" import { Remark } from "react-remark" +import { remarkPlainLinks } from "./markdown/remarkPlainLinks.js" type SimpleMarkdownTextProps = { text: string @@ -12,6 +13,6 @@ export class SimpleMarkdownText extends React.Component } render(): JSX.Element | null { - return {this.text} + return {this.text} } } diff --git a/packages/@ourworldindata/components/src/markdown/mdast-util-find-and-replace.d.ts b/packages/@ourworldindata/components/src/markdown/mdast-util-find-and-replace.d.ts new file mode 100644 index 00000000000..a338cd9b17e --- /dev/null +++ b/packages/@ourworldindata/components/src/markdown/mdast-util-find-and-replace.d.ts @@ -0,0 +1 @@ +declare module "mdast-util-find-and-replace" diff --git a/packages/@ourworldindata/components/src/markdown/remarkPlainLinks.ts b/packages/@ourworldindata/components/src/markdown/remarkPlainLinks.ts new file mode 100644 index 00000000000..b0b112b58cf --- /dev/null +++ b/packages/@ourworldindata/components/src/markdown/remarkPlainLinks.ts @@ -0,0 +1,37 @@ +import findAndReplace from "mdast-util-find-and-replace" + +// This regex matches: +// "http" +// an optional "s" +// two / characters +// The subdomains and hostname: Any word or numeric character or "_" or "-" one or more times followed by a period +// The TLD: Any word or numeric character or "_" or "-" one or more times +// The path, query string and fragment: A forward slash followed by any word or numeric character (unicode classes so umlauts like รถ match +// as well as any of the following: .+?:%&=~#) zero or more times. Note that we exclude space even though that is valid in a URL but it tends +// to make the match too greedy. +// We match the same subgroup [\p{L}\p{N}_\-.\+/?:%&=~#] twice, once with a * and then excactly once but without interpuncation characters .?: +// This is to make sure that we don't match trailing punctuation as part of the URL ("This is an http://example.com." - note that the leading +// period should not be part of the URL) +// Finally, the very last part is a lone forward slash which would not be matched by the previous subgroup. +export const urlRegex = + /https?:\/\/([\w-]+\.)+[\w-]+((\/[\p{L}\p{N}_\-.\+/?:%&=~#]*[\p{L}\p{N}_\-\+/%&=~#])|\/)?/gu + +export function remarkPlainLinks() { + const turnIntoLink = (value: any, _match: string) => { + return [ + { + type: "link", + url: value, + children: [ + { + type: "text", + value: value, + }, + ], + }, + ] + } + return (tree: any) => { + findAndReplace(tree, [[urlRegex, turnIntoLink]]) + } +} diff --git a/yarn.lock b/yarn.lock index 2ae9655b992..10bdcd50dff 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10518,6 +10518,7 @@ __metadata: lodash: "npm:^4.17.20" mathjax-full: "npm:^3.1.0" md5: "npm:^2.3.0" + mdast-util-find-and-replace: "npm:1.1.1" mdast-util-from-markdown: "npm:^0.8.0" minimist: "npm:^1.2.6" mobx: "npm:^5.15.7" @@ -13597,6 +13598,17 @@ __metadata: languageName: node linkType: hard +"mdast-util-find-and-replace@npm:1.1.1": + version: 1.1.1 + resolution: "mdast-util-find-and-replace@npm:1.1.1" + dependencies: + escape-string-regexp: "npm:^4.0.0" + unist-util-is: "npm:^4.0.0" + unist-util-visit-parents: "npm:^3.0.0" + checksum: e4c9e50d9bce5ae4c728a925bd60080b94d16aaa312c27e2b70b16ddc29a5d0a0844d6e18efaef08aeb22c68303ec528f20183d1b0420504a0c2c1710cebd76f + languageName: node + linkType: hard + "mdast-util-from-markdown@npm:^0.8.0": version: 0.8.5 resolution: "mdast-util-from-markdown@npm:0.8.5"