Skip to content

Commit

Permalink
Merge pull request #2638 from owid/entry-emulator
Browse files Browse the repository at this point in the history
Entry Emulator - Summary Block
  • Loading branch information
ikesau authored Sep 20, 2023
2 parents a0a4cbb + 527dc2a commit c4eae75
Show file tree
Hide file tree
Showing 15 changed files with 385 additions and 33 deletions.
137 changes: 135 additions & 2 deletions db/migrateWpPostsToArchieMl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,136 @@ import {
adjustHeadingLevels,
} from "./model/Gdoc/htmlToEnriched.js"

// Hard-coded slugs to avoid WP dependency
const entries = new Set([
"population",
"population-change",
"age-structure",
"gender-ratio",
"life-and-death",
"life-expectancy",
"child-mortality",
"fertility-rate",
"distribution-of-the-world-population",
"urbanization",
"health",
"health-risks",
"air-pollution",
"outdoor-air-pollution",
"indoor-air-pollution",
"obesity",
"smoking",
"alcohol-consumption",
"infectious-diseases",
"monkeypox",
"coronavirus",
"hiv-aids",
"malaria",
"eradication-of-diseases",
"smallpox",
"polio",
"pneumonia",
"tetanus",
"health-institutions-and-interventions",
"financing-healthcare",
"vaccination",
"life-death-health",
"maternal-mortality",
"health-meta",
"causes-of-death",
"burden-of-disease",
"cancer",
"environment",
"nuclear-energy",
"energy-access",
"renewable-energy",
"fossil-fuels",
"waste",
"plastic-pollution",
"air-and-climate",
"co2-and-greenhouse-gas-emissions",
"climate-change",
"water",
"clean-water-sanitation",
"water-access",
"sanitation",
"water-use-stress",
"land-and-ecosystems",
"forests-and-deforestation",
"land-use",
"natural-disasters",
"food",
"nutrition",
"famines",
"food-supply",
"human-height",
"micronutrient-deficiency",
"diet-compositions",
"food-production",
"meat-production",
"agricultural-inputs",
"employment-in-agriculture",
"growth-inequality",
"public-sector",
"government-spending",
"taxation",
"military-personnel-spending",
"financing-education",
"poverty-and-prosperity",
"economic-inequality",
"poverty",
"economic-growth",
"economic-inequality-by-gender",
"labor",
"child-labor",
"working-hours",
"female-labor-supply",
"corruption",
"trade-migration",
"trade-and-globalization",
"tourism",
"education",
"educational-outcomes",
"global-education",
"literacy",
"pre-primary-education",
"primary-and-secondary-education",
"quality-of-education",
"tertiary-education",
"inputs-to-education",
"teachers-and-professors",
"media-education",
"technology",
"space-exploration-satellites",
"transport",
"work-life",
"culture",
"trust",
"housing",
"homelessness",
"time-use",
"relationships",
"marriages-and-divorces",
"social-connections-and-loneliness",
"happiness-wellbeing",
"happiness-and-life-satisfaction",
"human-development-index",
"politics",
"human-rights",
"lgbt-rights",
"women-rights",
"democracy",
"violence-rights",
"war-peace",
"biological-and-chemical-weapons",
"war-and-peace",
"terrorism",
"nuclear-weapons",
"violence",
"violence-against-rights-for-children",
"homicides",
])

const migrate = async (): Promise<void> => {
const writeToFile = false
const errors = []
Expand All @@ -33,7 +163,7 @@ const migrate = async (): Promise<void> => {
"excerpt",
"created_at_in_wordpress",
"updated_at"
).from(db.knexTable(Post.postsTable)) //.where("id", "=", "22821"))
).from(db.knexTable(Post.postsTable)) //.where("id", "=", "38189")

for (const post of posts) {
try {
Expand Down Expand Up @@ -83,6 +213,7 @@ const migrate = async (): Promise<void> => {
slug: post.slug,
content: {
body: archieMlBodyElements,
toc: [],
title: post.title,
subtitle: post.excerpt,
excerpt: post.excerpt,
Expand All @@ -92,7 +223,9 @@ const migrate = async (): Promise<void> => {
dateline: dateline,
// TODO: this discards block level elements - those might be needed?
refs: undefined,
type: OwidGdocType.Article,
type: entries.has(post.slug)
? OwidGdocType.TopicPage
: OwidGdocType.Article,
},
published: false,
createdAt:
Expand Down
1 change: 1 addition & 0 deletions db/model/Gdoc/Gdoc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,7 @@ export class Gdoc extends BaseEntity implements OwidGdocInterface {
"aside",
"callout",
"expandable-paragraph",
"entry-summary",
"gray-section",
"heading",
"horizontal-rule",
Expand Down
9 changes: 9 additions & 0 deletions db/model/Gdoc/enrichedToRaw.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
EnrichedBlockResearchAndWritingLink,
RawBlockResearchAndWritingLink,
RawBlockAlign,
RawBlockEntrySummary,
} from "@ourworldindata/utils"
import { spanToHtmlString } from "./gdocUtils.js"
import { match, P } from "ts-pattern"
Expand Down Expand Up @@ -372,5 +373,13 @@ export function enrichedBlockToRawBlock(
},
}
})
.with({ type: "entry-summary" }, (b): RawBlockEntrySummary => {
return {
type: b.type,
value: {
items: b.items,
},
}
})
.exhaustive()
}
5 changes: 5 additions & 0 deletions db/model/Gdoc/exampleEnrichedBlocks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -418,4 +418,9 @@ export const enrichedBlockExamples: Record<
content: [enrichedBlockText],
parseErrors: [],
},
"entry-summary": {
type: "entry-summary",
items: [{ text: "Hello", slug: "#link-to-something" }],
parseErrors: [],
},
}
112 changes: 108 additions & 4 deletions db/model/Gdoc/htmlToEnriched.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,22 @@ import {
EnrichedBlockProminentLink,
BlockImageSize,
detailOnDemandRegex,
EnrichedBlockEntrySummary,
EnrichedBlockEntrySummaryItem,
spansToUnformattedPlainText,
checkNodeIsSpanLink,
Url,
EnrichedBlockCallout,
} from "@ourworldindata/utils"
import { match, P } from "ts-pattern"
import { compact, flatten, isPlainObject, partition } from "lodash"
import {
compact,
flatten,
get,
isArray,
isPlainObject,
partition,
} from "lodash"
import cheerio from "cheerio"
import { spansToSimpleString } from "./gdocUtils.js"

Expand Down Expand Up @@ -223,6 +236,10 @@ type ErrorNames =
| "unhandled html tag found"
| "prominent link missing title"
| "prominent link missing url"
| "summary item isn't text"
| "summary item doesn't have link"
| "summary item has DataValue"
| "unknown content type inside summary block"

interface BlockParseError {
name: ErrorNames
Expand Down Expand Up @@ -337,11 +354,12 @@ function isArchieMlComponent(
export function convertAllWpComponentsToArchieMLBlocks(
blocksOrComponents: ArchieBlockOrWpComponent[]
): OwidEnrichedGdocBlock[] {
return blocksOrComponents.flatMap((blockOrComponent) => {
if (isArchieMlComponent(blockOrComponent)) return [blockOrComponent]
return blocksOrComponents.flatMap((blockOrComponentOrToc) => {
if (isArchieMlComponent(blockOrComponentOrToc))
return [blockOrComponentOrToc]
else {
return convertAllWpComponentsToArchieMLBlocks(
blockOrComponent.childrenResults
blockOrComponentOrToc.childrenResults
)
}
})
Expand Down Expand Up @@ -596,6 +614,92 @@ function finishWpComponent(
}
} else return { ...content, errors }
})
.with("owid/summary", () => {
// Summaries can either be lists of anchor links, or paragraphs of text
// If it's a paragraph of text, we want to turn it into a callout block
// If it's a list of anchor links, we want to turn it into a toc block
const contentIsAllText =
content.content.find(
(block) => "type" in block && block.type !== "text"
) === undefined

if (contentIsAllText) {
const callout: EnrichedBlockCallout = {
type: "callout",
title: "Summary",
text: content.content as EnrichedBlockText[],
parseErrors: [],
}
return { errors: [], content: [callout] }
}

const contentIsList =
content.content.length === 1 &&
"type" in content.content[0] &&
content.content[0].type === "list"
if (contentIsList) {
const listItems = get(content, ["content", 0, "items"])
const items: EnrichedBlockEntrySummaryItem[] = []
const errors = content.errors
if (isArray(listItems)) {
listItems.forEach((item) => {
if (item.type === "text") {
const value = item.value[0]
if (checkNodeIsSpanLink(value)) {
const { hash } = Url.fromURL(value.url)
const text = spansToUnformattedPlainText(
value.children
)
if (text.includes("DataValue")) {
errors.push({
name: "summary item has DataValue",
details: text,
})
}
items.push({
// Remove "#" from the beginning of the slug
slug: hash.slice(1),
text: text,
})
} else {
errors.push({
name: "summary item doesn't have link",
details: value
? `spanType is ${value.spanType}`
: "No item",
})
}
} else {
errors.push({
name: "summary item isn't text",
details: `item is type: ${item.type}`,
})
}
})
}
const toc: EnrichedBlockEntrySummary = {
type: "entry-summary",
items,
parseErrors: [],
}
return { errors: [], content: [toc] }
}

const error: BlockParseError = {
name: "unknown content type inside summary block",
details:
"Unknown summary content: " +
content.content
.map((block) =>
"type" in block ? block.type : block.tagName
)
.join(", "),
}
return {
errors: [error],
content: [],
}
})
.otherwise(() => {
return {
errors: [
Expand Down
17 changes: 17 additions & 0 deletions db/model/Gdoc/rawToArchie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import {
RawBlockTopicPageIntro,
RawBlockExpandableParagraph,
RawBlockAlign,
RawBlockEntrySummary,
} from "@ourworldindata/utils"
import { match } from "ts-pattern"

Expand Down Expand Up @@ -522,6 +523,21 @@ function* rawBlockAlignToArchieMLString(
yield "{}"
}

function* rawBlockEntrySummaryToArchieMLString(
block: RawBlockEntrySummary
): Generator<string, void, undefined> {
yield "{.entry-summary}"
yield "[.items]"
if (block.value.items) {
for (const item of block.value.items) {
yield* propertyToArchieMLString("text", item)
yield* propertyToArchieMLString("slug", item)
}
}
yield "[]"
yield "{}"
}

export function* OwidRawGdocBlockToArchieMLStringGenerator(
block: OwidRawGdocBlock
): Generator<string, void, undefined> {
Expand Down Expand Up @@ -581,6 +597,7 @@ export function* OwidRawGdocBlockToArchieMLStringGenerator(
rawResearchAndWritingToArchieMLString
)
.with({ type: "align" }, rawBlockAlignToArchieMLString)
.with({ type: "entry-summary" }, rawBlockEntrySummaryToArchieMLString)
.exhaustive()
yield* content
}
Expand Down
Loading

0 comments on commit c4eae75

Please sign in to comment.