Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Entry Emulator - Summary Block #2638

Merged
merged 7 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 135 additions & 2 deletions db/migrateWpPostsToArchieMl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,136 @@ import {
adjustHeadingLevels,
} from "./model/Gdoc/htmlToEnriched.js"

// Hard-coded slugs to avoid WP dependency
const entries = new Set([
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you mind explaining how you generated this list of slugs?

"population",
"population-change",
"age-structure",
"gender-ratio",
"life-and-death",
"life-expectancy",
"child-mortality",
"fertility-rate",
"distribution-of-the-world-population",
"urbanization",
"health",
"health-risks",
"air-pollution",
"outdoor-air-pollution",
"indoor-air-pollution",
"obesity",
"smoking",
"alcohol-consumption",
"infectious-diseases",
"monkeypox",
"coronavirus",
"hiv-aids",
"malaria",
"eradication-of-diseases",
"smallpox",
"polio",
"pneumonia",
"tetanus",
"health-institutions-and-interventions",
"financing-healthcare",
"vaccination",
"life-death-health",
"maternal-mortality",
"health-meta",
"causes-of-death",
"burden-of-disease",
"cancer",
"environment",
"nuclear-energy",
"energy-access",
"renewable-energy",
"fossil-fuels",
"waste",
"plastic-pollution",
"air-and-climate",
"co2-and-greenhouse-gas-emissions",
"climate-change",
"water",
"clean-water-sanitation",
"water-access",
"sanitation",
"water-use-stress",
"land-and-ecosystems",
"forests-and-deforestation",
"land-use",
"natural-disasters",
"food",
"nutrition",
"famines",
"food-supply",
"human-height",
"micronutrient-deficiency",
"diet-compositions",
"food-production",
"meat-production",
"agricultural-inputs",
"employment-in-agriculture",
"growth-inequality",
"public-sector",
"government-spending",
"taxation",
"military-personnel-spending",
"financing-education",
"poverty-and-prosperity",
"economic-inequality",
"poverty",
"economic-growth",
"economic-inequality-by-gender",
"labor",
"child-labor",
"working-hours",
"female-labor-supply",
"corruption",
"trade-migration",
"trade-and-globalization",
"tourism",
"education",
"educational-outcomes",
"global-education",
"literacy",
"pre-primary-education",
"primary-and-secondary-education",
"quality-of-education",
"tertiary-education",
"inputs-to-education",
"teachers-and-professors",
"media-education",
"technology",
"space-exploration-satellites",
"transport",
"work-life",
"culture",
"trust",
"housing",
"homelessness",
"time-use",
"relationships",
"marriages-and-divorces",
"social-connections-and-loneliness",
"happiness-wellbeing",
"happiness-and-life-satisfaction",
"human-development-index",
"politics",
"human-rights",
"lgbt-rights",
"women-rights",
"democracy",
"violence-rights",
"war-peace",
"biological-and-chemical-weapons",
"war-and-peace",
"terrorism",
"nuclear-weapons",
"violence",
"violence-against-rights-for-children",
"homicides",
])

const migrate = async (): Promise<void> => {
const writeToFile = false
const errors = []
Expand All @@ -33,7 +163,7 @@ const migrate = async (): Promise<void> => {
"excerpt",
"created_at_in_wordpress",
"updated_at"
).from(db.knexTable(Post.postsTable)) //.where("id", "=", "22821"))
).from(db.knexTable(Post.postsTable)) //.where("id", "=", "38189")

for (const post of posts) {
try {
Expand Down Expand Up @@ -83,6 +213,7 @@ const migrate = async (): Promise<void> => {
slug: post.slug,
content: {
body: archieMlBodyElements,
toc: [],
title: post.title,
subtitle: post.excerpt,
excerpt: post.excerpt,
Expand All @@ -92,7 +223,9 @@ const migrate = async (): Promise<void> => {
dateline: dateline,
// TODO: this discards block level elements - those might be needed?
refs: undefined,
type: OwidGdocType.Article,
type: entries.has(post.slug)
? OwidGdocType.TopicPage
: OwidGdocType.Article,
},
published: false,
createdAt:
Expand Down
1 change: 1 addition & 0 deletions db/model/Gdoc/Gdoc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,7 @@ export class Gdoc extends BaseEntity implements OwidGdocInterface {
"aside",
"callout",
"expandable-paragraph",
"entry-summary",
"gray-section",
"heading",
"horizontal-rule",
Expand Down
9 changes: 9 additions & 0 deletions db/model/Gdoc/enrichedToRaw.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
EnrichedBlockResearchAndWritingLink,
RawBlockResearchAndWritingLink,
RawBlockAlign,
RawBlockEntrySummary,
} from "@ourworldindata/utils"
import { spanToHtmlString } from "./gdocUtils.js"
import { match, P } from "ts-pattern"
Expand Down Expand Up @@ -372,5 +373,13 @@ export function enrichedBlockToRawBlock(
},
}
})
.with({ type: "entry-summary" }, (b): RawBlockEntrySummary => {
return {
type: b.type,
value: {
items: b.items,
},
}
})
.exhaustive()
}
5 changes: 5 additions & 0 deletions db/model/Gdoc/exampleEnrichedBlocks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -418,4 +418,9 @@ export const enrichedBlockExamples: Record<
content: [enrichedBlockText],
parseErrors: [],
},
"entry-summary": {
type: "entry-summary",
items: [{ text: "Hello", slug: "#link-to-something" }],
parseErrors: [],
},
}
112 changes: 108 additions & 4 deletions db/model/Gdoc/htmlToEnriched.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,22 @@ import {
EnrichedBlockProminentLink,
BlockImageSize,
detailOnDemandRegex,
EnrichedBlockEntrySummary,
EnrichedBlockEntrySummaryItem,
spansToUnformattedPlainText,
checkNodeIsSpanLink,
Url,
EnrichedBlockCallout,
} from "@ourworldindata/utils"
import { match, P } from "ts-pattern"
import { compact, flatten, isPlainObject, partition } from "lodash"
import {
compact,
flatten,
get,
isArray,
isPlainObject,
partition,
} from "lodash"
import cheerio from "cheerio"
import { spansToSimpleString } from "./gdocUtils.js"

Expand Down Expand Up @@ -223,6 +236,10 @@ type ErrorNames =
| "unhandled html tag found"
| "prominent link missing title"
| "prominent link missing url"
| "summary item isn't text"
| "summary item doesn't have link"
| "summary item has DataValue"
| "Unknown content type inside summary block"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: remove capitalization for consistency


interface BlockParseError {
name: ErrorNames
Expand Down Expand Up @@ -337,11 +354,12 @@ function isArchieMlComponent(
export function convertAllWpComponentsToArchieMLBlocks(
blocksOrComponents: ArchieBlockOrWpComponent[]
): OwidEnrichedGdocBlock[] {
return blocksOrComponents.flatMap((blockOrComponent) => {
if (isArchieMlComponent(blockOrComponent)) return [blockOrComponent]
return blocksOrComponents.flatMap((blockOrComponentOrToc) => {
if (isArchieMlComponent(blockOrComponentOrToc))
return [blockOrComponentOrToc]
else {
return convertAllWpComponentsToArchieMLBlocks(
blockOrComponent.childrenResults
blockOrComponentOrToc.childrenResults
)
}
})
Expand Down Expand Up @@ -596,6 +614,92 @@ function finishWpComponent(
}
} else return { ...content, errors }
})
.with("owid/summary", () => {
// Summaries can either be lists of anchor links, or paragraphs of text
// If it's a paragraph of text, we want to turn it into a callout block
// If it's a list of anchor links, we want to turn it into a toc block
Comment on lines +618 to +620
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a summary block is both text and list (e.g. https://ourworldindata.org/vaccination), both conditions get skipped below and the summary gets stripped out of the output.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we don't really have a way to render a similar component in Gdocs. Instead we'll track it and deal with it manually.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right, I'd missed the last error

const contentIsAllText =
content.content.find(
(block) => "type" in block && block.type !== "text"
) === undefined

if (contentIsAllText) {
const callout: EnrichedBlockCallout = {
type: "callout",
title: "Summary",
text: content.content as EnrichedBlockText[],
parseErrors: [],
}
return { errors: [], content: [callout] }
}

const contentIsList =
content.content.length === 1 &&
"type" in content.content[0] &&
content.content[0].type === "list"
if (contentIsList) {
const listItems = get(content, ["content", 0])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems like "items" is missing and so list-based summary blocks (e.g. https://ourworldindata.org/working-hours) return an empty block (yet visible and expandable).

Suggested change
const listItems = get(content, ["content", 0])
const listItems = get(content, ["content", 0, "items"])

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oof! Sorry about that! You're right, I removed it for the callout refactor and then forgot to change it back.

const items: EnrichedBlockEntrySummaryItem[] = []
const errors = content.errors
if (isArray(listItems)) {
listItems.forEach((item) => {
if (item.type === "text") {
const value = item.value[0]
if (checkNodeIsSpanLink(value)) {
const { hash } = Url.fromURL(value.url)
const text = spansToUnformattedPlainText(
value.children
)
if (text.includes("DataValue")) {
errors.push({
name: "summary item has DataValue",
details: text,
})
}
items.push({
// Remove "#" from the beginning of the slug
slug: hash.slice(1),
text: text,
})
} else {
errors.push({
name: "summary item doesn't have link",
details: value
? `spanType is ${value.spanType}`
: "No item",
})
}
} else {
errors.push({
name: "summary item isn't text",
details: `item is type: ${item.type}`,
})
}
})
}
const toc: EnrichedBlockEntrySummary = {
type: "entry-summary",
items,
parseErrors: [],
}
return { errors: [], content: [toc] }
}

const error: BlockParseError = {
name: "Unknown content type inside summary block",
details:
"Unknown summary content: " +
content.content
.map((block) =>
"type" in block ? block.type : block.tagName
)
.join(", "),
}
return {
errors: [error],
content: [],
}
})
.otherwise(() => {
return {
errors: [
Expand Down
17 changes: 17 additions & 0 deletions db/model/Gdoc/rawToArchie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import {
RawBlockTopicPageIntro,
RawBlockExpandableParagraph,
RawBlockAlign,
RawBlockEntrySummary,
} from "@ourworldindata/utils"
import { match } from "ts-pattern"

Expand Down Expand Up @@ -522,6 +523,21 @@ function* rawBlockAlignToArchieMLString(
yield "{}"
}

function* rawBlockEntrySummaryToArchieMLString(
block: RawBlockEntrySummary
): Generator<string, void, undefined> {
yield "{.entry-summary}"
yield "[.items]"
if (block.value.items) {
for (const item of block.value.items) {
yield* propertyToArchieMLString("text", item)
yield* propertyToArchieMLString("slug", item)
}
}
yield "[]"
yield "{}"
}

export function* OwidRawGdocBlockToArchieMLStringGenerator(
block: OwidRawGdocBlock
): Generator<string, void, undefined> {
Expand Down Expand Up @@ -581,6 +597,7 @@ export function* OwidRawGdocBlockToArchieMLStringGenerator(
rawResearchAndWritingToArchieMLString
)
.with({ type: "align" }, rawBlockAlignToArchieMLString)
.with({ type: "entry-summary" }, rawBlockEntrySummaryToArchieMLString)
.exhaustive()
yield* content
}
Expand Down
Loading