Skip to content

Commit

Permalink
✨ use official list of entry slugs for wp migration, conditionally co…
Browse files Browse the repository at this point in the history
…nsolidate columns for linear entries only
  • Loading branch information
ikesau committed Oct 13, 2023
1 parent df6b01f commit 6744d5d
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 130 deletions.
157 changes: 41 additions & 116 deletions db/migrateWpPostsToArchieMl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,135 +20,59 @@ import {
} from "./model/Gdoc/htmlToEnriched.js"
import { getRelatedCharts } from "./wpdb.js"

// Hard-coded slugs to avoid WP dependency
// headerMenu.json minus gdoc topic page slugs and wp topic page slugs
// slugs from all the linear entries we want to migrate from @edomt
const entries = new Set([
"population",
"population-change",
"age-structure",
"gender-ratio",
"life-and-death",
"life-expectancy",
"child-mortality",
"fertility-rate",
"distribution-of-the-world-population",
"urbanization",
"health",
"health-risks",
"air-pollution",
"outdoor-air-pollution",
"indoor-air-pollution",
"obesity",
"smoking",
"alcohol-consumption",
"infectious-diseases",
"monkeypox",
"coronavirus",
"hiv-aids",
"malaria",
"eradication-of-diseases",
"smallpox",
"polio",
"pneumonia",
"tetanus",
"health-institutions-and-interventions",
"financing-healthcare",
"vaccination",
"life-death-health",
"maternal-mortality",
"health-meta",
"causes-of-death",
"burden-of-disease",
"cancer",
"environment",
"nuclear-energy",
"energy-access",
"renewable-energy",
"fossil-fuels",
"waste",
"plastic-pollution",
"air-and-climate",
"co2-and-greenhouse-gas-emissions",
"climate-change",
"water",
"clean-water-sanitation",
"water-access",
"sanitation",
"water-use-stress",
"land-and-ecosystems",
"forests-and-deforestation",
"land-use",
"natural-disasters",
"food",
"nutrition",
"child-labor",
"corruption",
"economic-inequality-by-gender",
"eradication-of-diseases",
"famines",
"female-labor-supply",
"fertility-rate",
"financing-healthcare",
"fish-and-overfishing",
"food-supply",
"gender-ratio",
"government-spending",
"happiness-and-life-satisfaction",
"health-meta",
"hiv-aids",
"homelessness",
"human-height",
"micronutrient-deficiency",
"diet-compositions",
"food-production",
"indoor-air-pollution",
"land-use",
"literacy",
"malaria",
"marriages-and-divorces",
"maternal-mortality",
"meat-production",
"agricultural-inputs",
"employment-in-agriculture",
"growth-inequality",
"public-sector",
"government-spending",
"micronutrient-deficiency",
"natural-disasters",
"nuclear-weapons",
"obesity",
"outdoor-air-pollution",
"pneumonia",
"polio",
"sanitation",
"smallpox",
"smoking",
"social-connections-and-loneliness",
"taxation",
"military-personnel-spending",
"financing-education",
"poverty-and-prosperity",
"economic-inequality",
"poverty",
"economic-growth",
"economic-inequality-by-gender",
"labor",
"child-labor",
"working-hours",
"female-labor-supply",
"corruption",
"trade-migration",
"tetanus",
"time-use",
"trade-and-globalization",
"tourism",
"education",
"educational-outcomes",
"global-education",
"literacy",
"pre-primary-education",
"primary-and-secondary-education",
"quality-of-education",
"tertiary-education",
"inputs-to-education",
"teachers-and-professors",
"media-education",
"technology",
"space-exploration-satellites",
"transport",
"work-life",
"culture",
"trust",
"housing",
"homelessness",
"time-use",
"relationships",
"marriages-and-divorces",
"social-connections-and-loneliness",
"happiness-wellbeing",
"happiness-and-life-satisfaction",
"human-development-index",
"politics",
"human-rights",
"lgbt-rights",
"women-rights",
"democracy",
"violence-rights",
"war-peace",
"biological-and-chemical-weapons",
"war-and-peace",
"terrorism",
"nuclear-weapons",
"violence",
"urbanization",
"vaccination",
"violence-against-rights-for-children",
"homicides",
"water-access",
"water-use-stress",
"working-hours",
])

const migrate = async (): Promise<void> => {
Expand All @@ -167,7 +91,7 @@ const migrate = async (): Promise<void> => {
"excerpt",
"created_at_in_wordpress",
"updated_at"
).from(db.knexTable(Post.postsTable).where("id", "=", "29766"))
).from(db.knexTable(Post.postsTable)) //.where("id", "=", "29766"))

for (const post of posts) {
try {
Expand All @@ -188,6 +112,7 @@ const migrate = async (): Promise<void> => {
shouldParseWpComponents: true,
htmlTagCounts: {},
wpTagCounts: {},
isEntry,
}
const parsedResult = cheerioElementsToArchieML(
bodyContents,
Expand Down
2 changes: 2 additions & 0 deletions db/model/Gdoc/htmlToEnriched.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ it("parses a Wordpress paragraph within the content", () => {
shouldParseWpComponents: true,
htmlTagCounts: {},
wpTagCounts: {},
isEntry: false,
}

const parsedResult = cheerioElementsToArchieML(bodyContents, context)
Expand Down Expand Up @@ -60,6 +61,7 @@ it("parses a Wordpress paragraph as the first element", () => {
shouldParseWpComponents: true,
wpTagCounts: {},
htmlTagCounts: {},
isEntry: false,
})

expect(parsedResult.content).toEqual([
Expand Down
71 changes: 57 additions & 14 deletions db/model/Gdoc/htmlToEnriched.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import {
EnrichedBlockCallout,
EnrichedBlockExpandableParagraph,
EnrichedBlockGraySection,
EnrichedBlockStickyRightContainer,
} from "@ourworldindata/utils"
import { match, P } from "ts-pattern"
import {
Expand Down Expand Up @@ -298,6 +299,7 @@ interface ParseContext {
shouldParseWpComponents: boolean
htmlTagCounts: Record<string, number>
wpTagCounts: Record<string, number>
isEntry?: boolean
}

/** Regular expression to identify wordpress components in html components. These
Expand Down Expand Up @@ -448,10 +450,14 @@ export function parseWpComponent(
// tag that we want to ignore then don't try to find a closing tag
if (componentDetails.isVoidElement)
return {
result: finishWpComponent(componentDetails, {
errors: [],
content: [],
}),
result: finishWpComponent(
componentDetails,
{
errors: [],
content: [],
},
context
),
remainingElements: remainingElements,
}
if (wpComponentTagsToIgnore.includes(componentDetails.tagName))
Expand Down Expand Up @@ -487,7 +493,8 @@ export function parseWpComponent(
componentDetails,
withoutEmptyOrWhitespaceOnlyTextBlocks(
collectedChildren
)
),
context
),
remainingElements: remainingElements.slice(1),
}
Expand All @@ -511,7 +518,8 @@ export function parseWpComponent(
we create that - otherwise we keep the WpComponent around with the children content filled in */
function finishWpComponent(
details: WpComponent,
content: BlockParseResult<ArchieBlockOrWpComponent>
content: BlockParseResult<ArchieBlockOrWpComponent>,
context: ParseContext
): BlockParseResult<ArchieBlockOrWpComponent> {
return match(details.tagName)
.with("column", (): BlockParseResult<ArchieBlockOrWpComponent> => {
Expand Down Expand Up @@ -557,6 +565,17 @@ function finishWpComponent(
return { ...content, errors }
}

// For linear entries, we always want them to be a single column
if (context.isEntry) {
return {
errors,
content: convertAllWpComponentsToArchieMLBlocks([
...firstChild.childrenResults,
...secondChild.childrenResults,
]),
}
}

// If both children are empty then we don't want to create a columns block
if (
firstChild.childrenResults.length === 0 &&
Expand All @@ -567,16 +586,40 @@ function finishWpComponent(
content: [],
}
}
// Originally we had more complex logic here:
// - 1 column with content, 1 column empty -> convert to a single column
// - 2 columns with content -> keep as is
// But now we want to convert everything to be a single column, so we just extract all the blocks and stack them
// If one of the children is empty then don't create a two column layout but
// just return the non-empty child
if (firstChild.childrenResults.length === 0) {
return {
errors,
content: convertAllWpComponentsToArchieMLBlocks(
secondChild.childrenResults
),
}
}
if (secondChild.childrenResults.length === 0) {
return {
errors,
content: convertAllWpComponentsToArchieMLBlocks(
firstChild.childrenResults
),
}
}

// if both columns have content, create a sticky-right layout
return {
errors,
content: convertAllWpComponentsToArchieMLBlocks([
...firstChild.childrenResults,
...secondChild.childrenResults,
]),
content: [
{
type: "sticky-right",
left: convertAllWpComponentsToArchieMLBlocks(
firstChild.childrenResults
),
right: convertAllWpComponentsToArchieMLBlocks(
secondChild.childrenResults
),
parseErrors: [],
} as EnrichedBlockStickyRightContainer,
],
}
})
.with("owid/prominent-link", () => {
Expand Down

0 comments on commit 6744d5d

Please sign in to comment.