diff --git a/db/model/Gdoc/htmlToEnriched.ts b/db/model/Gdoc/htmlToEnriched.ts index 80a2ba4a502..6cc8fc13ab0 100644 --- a/db/model/Gdoc/htmlToEnriched.ts +++ b/db/model/Gdoc/htmlToEnriched.ts @@ -34,6 +34,7 @@ import { EnrichedBlockGraySection, EnrichedBlockStickyRightContainer, EnrichedBlockBlockquote, + traverseEnrichedSpan, } from "@ourworldindata/utils" import { match, P } from "ts-pattern" import { @@ -789,6 +790,39 @@ function finishWpComponent( }) } +function extractProminentLinkFromBlockQuote( + spans: Span[] +): EnrichedBlockProminentLink | undefined { + const spansContainRelatedChart = spansToSimpleString(spans) + .toLowerCase() + .includes("related chart") + + if (!spansContainRelatedChart) return undefined + + let isRelatedChart = false + let url = "" + + spans.forEach((span) => + traverseEnrichedSpan(span, (span) => { + if ( + span.spanType === "span-link" && + span.url.includes("/grapher/") + ) { + url = span.url + isRelatedChart = true + } + }) + ) + + if (isRelatedChart) + return { + type: "prominent-link", + url, + parseErrors: [], + } + return +} + function isEnrichedTextBlock( item: ArchieBlockOrWpComponent ): item is EnrichedBlockText { @@ -822,9 +856,20 @@ function cheerioToArchieML( .with({ tagName: "address" }, unwrapElementWithContext) .with( { tagName: "blockquote" }, - (): BlockParseResult => { + (): BlockParseResult< + EnrichedBlockBlockquote | EnrichedBlockProminentLink + > => { const spansResult = getSpansFromChildren(element, context) - + // Sometimes blockquotes were used for prominent links before we had a bespoke + // component for them. Using some simple heuristics we try to convert these if possible + const prominentLink = extractProminentLinkFromBlockQuote( + spansResult.content + ) + if (prominentLink) + return { + errors: [], + content: [prominentLink], + } return { errors: spansResult.errors, content: [