Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Entry Emulator - Stacked Columns #2649

Merged
merged 2 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 41 additions & 116 deletions db/migrateWpPostsToArchieMl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,135 +20,59 @@ import {
} from "./model/Gdoc/htmlToEnriched.js"
import { getRelatedCharts } from "./wpdb.js"

// Hard-coded slugs to avoid WP dependency
// headerMenu.json minus gdoc topic page slugs and wp topic page slugs
// slugs from all the linear entries we want to migrate from @edomt
const entries = new Set([
"population",
"population-change",
"age-structure",
"gender-ratio",
"life-and-death",
"life-expectancy",
"child-mortality",
"fertility-rate",
"distribution-of-the-world-population",
"urbanization",
"health",
"health-risks",
"air-pollution",
"outdoor-air-pollution",
"indoor-air-pollution",
"obesity",
"smoking",
"alcohol-consumption",
"infectious-diseases",
"monkeypox",
"coronavirus",
"hiv-aids",
"malaria",
"eradication-of-diseases",
"smallpox",
"polio",
"pneumonia",
"tetanus",
"health-institutions-and-interventions",
"financing-healthcare",
"vaccination",
"life-death-health",
"maternal-mortality",
"health-meta",
"causes-of-death",
"burden-of-disease",
"cancer",
"environment",
"nuclear-energy",
"energy-access",
"renewable-energy",
"fossil-fuels",
"waste",
"plastic-pollution",
"air-and-climate",
"co2-and-greenhouse-gas-emissions",
"climate-change",
"water",
"clean-water-sanitation",
"water-access",
"sanitation",
"water-use-stress",
"land-and-ecosystems",
"forests-and-deforestation",
"land-use",
"natural-disasters",
"food",
"nutrition",
"child-labor",
"corruption",
"economic-inequality-by-gender",
"eradication-of-diseases",
"famines",
"female-labor-supply",
"fertility-rate",
"financing-healthcare",
"fish-and-overfishing",
"food-supply",
"gender-ratio",
"government-spending",
"happiness-and-life-satisfaction",
"health-meta",
"hiv-aids",
"homelessness",
"human-height",
"micronutrient-deficiency",
"diet-compositions",
"food-production",
"indoor-air-pollution",
"land-use",
"literacy",
"malaria",
"marriages-and-divorces",
"maternal-mortality",
"meat-production",
"agricultural-inputs",
"employment-in-agriculture",
"growth-inequality",
"public-sector",
"government-spending",
"micronutrient-deficiency",
"natural-disasters",
"nuclear-weapons",
"obesity",
"outdoor-air-pollution",
"pneumonia",
"polio",
"sanitation",
"smallpox",
"smoking",
"social-connections-and-loneliness",
"taxation",
"military-personnel-spending",
"financing-education",
"poverty-and-prosperity",
"economic-inequality",
"poverty",
"economic-growth",
"economic-inequality-by-gender",
"labor",
"child-labor",
"working-hours",
"female-labor-supply",
"corruption",
"trade-migration",
"tetanus",
"time-use",
"trade-and-globalization",
"tourism",
"education",
"educational-outcomes",
"global-education",
"literacy",
"pre-primary-education",
"primary-and-secondary-education",
"quality-of-education",
"tertiary-education",
"inputs-to-education",
"teachers-and-professors",
"media-education",
"technology",
"space-exploration-satellites",
"transport",
"work-life",
"culture",
"trust",
"housing",
"homelessness",
"time-use",
"relationships",
"marriages-and-divorces",
"social-connections-and-loneliness",
"happiness-wellbeing",
"happiness-and-life-satisfaction",
"human-development-index",
"politics",
"human-rights",
"lgbt-rights",
"women-rights",
"democracy",
"violence-rights",
"war-peace",
"biological-and-chemical-weapons",
"war-and-peace",
"terrorism",
"nuclear-weapons",
"violence",
"urbanization",
"vaccination",
"violence-against-rights-for-children",
"homicides",
"water-access",
"water-use-stress",
"working-hours",
])

const migrate = async (): Promise<void> => {
Expand All @@ -167,7 +91,7 @@ const migrate = async (): Promise<void> => {
"excerpt",
"created_at_in_wordpress",
"updated_at"
).from(db.knexTable(Post.postsTable).where("id", "=", "29766"))
).from(db.knexTable(Post.postsTable)) //.where("id", "=", "29766"))

for (const post of posts) {
try {
Expand All @@ -188,6 +112,7 @@ const migrate = async (): Promise<void> => {
shouldParseWpComponents: true,
htmlTagCounts: {},
wpTagCounts: {},
isEntry,
}
const parsedResult = cheerioElementsToArchieML(
bodyContents,
Expand Down
2 changes: 2 additions & 0 deletions db/model/Gdoc/htmlToEnriched.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ it("parses a Wordpress paragraph within the content", () => {
shouldParseWpComponents: true,
htmlTagCounts: {},
wpTagCounts: {},
isEntry: false,
}

const parsedResult = cheerioElementsToArchieML(bodyContents, context)
Expand Down Expand Up @@ -60,6 +61,7 @@ it("parses a Wordpress paragraph as the first element", () => {
shouldParseWpComponents: true,
wpTagCounts: {},
htmlTagCounts: {},
isEntry: false,
})

expect(parsedResult.content).toEqual([
Expand Down
32 changes: 25 additions & 7 deletions db/model/Gdoc/htmlToEnriched.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import {
EnrichedBlockChart,
EnrichedBlockHtml,
EnrichedBlockList,
EnrichedBlockStickyRightContainer,
EnrichedBlockNumberedList,
EnrichedBlockProminentLink,
BlockImageSize,
Expand All @@ -34,6 +33,7 @@ import {
EnrichedBlockCallout,
EnrichedBlockExpandableParagraph,
EnrichedBlockGraySection,
EnrichedBlockStickyRightContainer,
} from "@ourworldindata/utils"
import { match, P } from "ts-pattern"
import {
Expand Down Expand Up @@ -299,6 +299,7 @@ interface ParseContext {
shouldParseWpComponents: boolean
htmlTagCounts: Record<string, number>
wpTagCounts: Record<string, number>
isEntry?: boolean
}

/** Regular expression to identify wordpress components in html components. These
Expand Down Expand Up @@ -449,10 +450,14 @@ export function parseWpComponent(
// tag that we want to ignore then don't try to find a closing tag
if (componentDetails.isVoidElement)
return {
result: finishWpComponent(componentDetails, {
errors: [],
content: [],
}),
result: finishWpComponent(
componentDetails,
{
errors: [],
content: [],
},
context
),
remainingElements: remainingElements,
}
if (wpComponentTagsToIgnore.includes(componentDetails.tagName))
Expand Down Expand Up @@ -488,7 +493,8 @@ export function parseWpComponent(
componentDetails,
withoutEmptyOrWhitespaceOnlyTextBlocks(
collectedChildren
)
),
context
),
remainingElements: remainingElements.slice(1),
}
Expand All @@ -512,7 +518,8 @@ export function parseWpComponent(
we create that - otherwise we keep the WpComponent around with the children content filled in */
function finishWpComponent(
details: WpComponent,
content: BlockParseResult<ArchieBlockOrWpComponent>
content: BlockParseResult<ArchieBlockOrWpComponent>,
context: ParseContext
): BlockParseResult<ArchieBlockOrWpComponent> {
return match(details.tagName)
.with("column", (): BlockParseResult<ArchieBlockOrWpComponent> => {
Expand Down Expand Up @@ -558,6 +565,17 @@ function finishWpComponent(
return { ...content, errors }
}

// For linear entries, we always want them to be a single column
if (context.isEntry) {
return {
errors,
content: convertAllWpComponentsToArchieMLBlocks([
...firstChild.childrenResults,
...secondChild.childrenResults,
]),
}
}

// If both children are empty then we don't want to create a columns block
if (
firstChild.childrenResults.length === 0 &&
Expand Down
Loading