From 6744d5d9a296c0fca74fd002929a99c0056e2102 Mon Sep 17 00:00:00 2001 From: Ike Saunders Date: Fri, 13 Oct 2023 16:54:05 -0400 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20use=20official=20list=20of=20entry?= =?UTF-8?q?=20slugs=20for=20wp=20migration,=20conditionally=20consolidate?= =?UTF-8?q?=20columns=20for=20linear=20entries=20only?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- db/migrateWpPostsToArchieMl.ts | 157 +++++++-------------------- db/model/Gdoc/htmlToEnriched.test.ts | 2 + db/model/Gdoc/htmlToEnriched.ts | 71 +++++++++--- 3 files changed, 100 insertions(+), 130 deletions(-) diff --git a/db/migrateWpPostsToArchieMl.ts b/db/migrateWpPostsToArchieMl.ts index 6c549e6aa82..1269b944d96 100644 --- a/db/migrateWpPostsToArchieMl.ts +++ b/db/migrateWpPostsToArchieMl.ts @@ -20,135 +20,59 @@ import { } from "./model/Gdoc/htmlToEnriched.js" import { getRelatedCharts } from "./wpdb.js" -// Hard-coded slugs to avoid WP dependency -// headerMenu.json minus gdoc topic page slugs and wp topic page slugs +// slugs from all the linear entries we want to migrate from @edomt const entries = new Set([ - "population", - "population-change", "age-structure", - "gender-ratio", - "life-and-death", - "life-expectancy", - "child-mortality", - "fertility-rate", - "distribution-of-the-world-population", - "urbanization", - "health", - "health-risks", "air-pollution", - "outdoor-air-pollution", - "indoor-air-pollution", - "obesity", - "smoking", "alcohol-consumption", - "infectious-diseases", - "monkeypox", - "coronavirus", - "hiv-aids", - "malaria", - "eradication-of-diseases", - "smallpox", - "polio", - "pneumonia", - "tetanus", - "health-institutions-and-interventions", - "financing-healthcare", - "vaccination", - "life-death-health", - "maternal-mortality", - "health-meta", - "causes-of-death", "burden-of-disease", "cancer", - "environment", - "nuclear-energy", - "energy-access", - "renewable-energy", - "fossil-fuels", - "waste", - "plastic-pollution", - "air-and-climate", - "co2-and-greenhouse-gas-emissions", - "climate-change", - "water", - "clean-water-sanitation", - "water-access", - "sanitation", - "water-use-stress", - "land-and-ecosystems", - "forests-and-deforestation", - "land-use", - "natural-disasters", - "food", - "nutrition", + "child-labor", + "corruption", + "economic-inequality-by-gender", + "eradication-of-diseases", "famines", + "female-labor-supply", + "fertility-rate", + "financing-healthcare", + "fish-and-overfishing", "food-supply", + "gender-ratio", + "government-spending", + "happiness-and-life-satisfaction", + "health-meta", + "hiv-aids", + "homelessness", "human-height", - "micronutrient-deficiency", - "diet-compositions", - "food-production", + "indoor-air-pollution", + "land-use", + "literacy", + "malaria", + "marriages-and-divorces", + "maternal-mortality", "meat-production", - "agricultural-inputs", - "employment-in-agriculture", - "growth-inequality", - "public-sector", - "government-spending", + "micronutrient-deficiency", + "natural-disasters", + "nuclear-weapons", + "obesity", + "outdoor-air-pollution", + "pneumonia", + "polio", + "sanitation", + "smallpox", + "smoking", + "social-connections-and-loneliness", "taxation", - "military-personnel-spending", - "financing-education", - "poverty-and-prosperity", - "economic-inequality", - "poverty", - "economic-growth", - "economic-inequality-by-gender", - "labor", - "child-labor", - "working-hours", - "female-labor-supply", - "corruption", - "trade-migration", + "tetanus", + "time-use", "trade-and-globalization", - "tourism", - "education", - "educational-outcomes", - "global-education", - "literacy", - "pre-primary-education", - "primary-and-secondary-education", - "quality-of-education", - "tertiary-education", - "inputs-to-education", - "teachers-and-professors", - "media-education", - "technology", - "space-exploration-satellites", "transport", - "work-life", - "culture", - "trust", - "housing", - "homelessness", - "time-use", - "relationships", - "marriages-and-divorces", - "social-connections-and-loneliness", - "happiness-wellbeing", - "happiness-and-life-satisfaction", - "human-development-index", - "politics", - "human-rights", - "lgbt-rights", - "women-rights", - "democracy", - "violence-rights", - "war-peace", - "biological-and-chemical-weapons", - "war-and-peace", - "terrorism", - "nuclear-weapons", - "violence", + "urbanization", + "vaccination", "violence-against-rights-for-children", - "homicides", + "water-access", + "water-use-stress", + "working-hours", ]) const migrate = async (): Promise => { @@ -167,7 +91,7 @@ const migrate = async (): Promise => { "excerpt", "created_at_in_wordpress", "updated_at" - ).from(db.knexTable(Post.postsTable).where("id", "=", "29766")) + ).from(db.knexTable(Post.postsTable)) //.where("id", "=", "29766")) for (const post of posts) { try { @@ -188,6 +112,7 @@ const migrate = async (): Promise => { shouldParseWpComponents: true, htmlTagCounts: {}, wpTagCounts: {}, + isEntry, } const parsedResult = cheerioElementsToArchieML( bodyContents, diff --git a/db/model/Gdoc/htmlToEnriched.test.ts b/db/model/Gdoc/htmlToEnriched.test.ts index 38f4be9d498..53984ce70bc 100644 --- a/db/model/Gdoc/htmlToEnriched.test.ts +++ b/db/model/Gdoc/htmlToEnriched.test.ts @@ -23,6 +23,7 @@ it("parses a Wordpress paragraph within the content", () => { shouldParseWpComponents: true, htmlTagCounts: {}, wpTagCounts: {}, + isEntry: false, } const parsedResult = cheerioElementsToArchieML(bodyContents, context) @@ -60,6 +61,7 @@ it("parses a Wordpress paragraph as the first element", () => { shouldParseWpComponents: true, wpTagCounts: {}, htmlTagCounts: {}, + isEntry: false, }) expect(parsedResult.content).toEqual([ diff --git a/db/model/Gdoc/htmlToEnriched.ts b/db/model/Gdoc/htmlToEnriched.ts index dc3fa1d0dfb..962d292d608 100644 --- a/db/model/Gdoc/htmlToEnriched.ts +++ b/db/model/Gdoc/htmlToEnriched.ts @@ -33,6 +33,7 @@ import { EnrichedBlockCallout, EnrichedBlockExpandableParagraph, EnrichedBlockGraySection, + EnrichedBlockStickyRightContainer, } from "@ourworldindata/utils" import { match, P } from "ts-pattern" import { @@ -298,6 +299,7 @@ interface ParseContext { shouldParseWpComponents: boolean htmlTagCounts: Record wpTagCounts: Record + isEntry?: boolean } /** Regular expression to identify wordpress components in html components. These @@ -448,10 +450,14 @@ export function parseWpComponent( // tag that we want to ignore then don't try to find a closing tag if (componentDetails.isVoidElement) return { - result: finishWpComponent(componentDetails, { - errors: [], - content: [], - }), + result: finishWpComponent( + componentDetails, + { + errors: [], + content: [], + }, + context + ), remainingElements: remainingElements, } if (wpComponentTagsToIgnore.includes(componentDetails.tagName)) @@ -487,7 +493,8 @@ export function parseWpComponent( componentDetails, withoutEmptyOrWhitespaceOnlyTextBlocks( collectedChildren - ) + ), + context ), remainingElements: remainingElements.slice(1), } @@ -511,7 +518,8 @@ export function parseWpComponent( we create that - otherwise we keep the WpComponent around with the children content filled in */ function finishWpComponent( details: WpComponent, - content: BlockParseResult + content: BlockParseResult, + context: ParseContext ): BlockParseResult { return match(details.tagName) .with("column", (): BlockParseResult => { @@ -557,6 +565,17 @@ function finishWpComponent( return { ...content, errors } } + // For linear entries, we always want them to be a single column + if (context.isEntry) { + return { + errors, + content: convertAllWpComponentsToArchieMLBlocks([ + ...firstChild.childrenResults, + ...secondChild.childrenResults, + ]), + } + } + // If both children are empty then we don't want to create a columns block if ( firstChild.childrenResults.length === 0 && @@ -567,16 +586,40 @@ function finishWpComponent( content: [], } } - // Originally we had more complex logic here: - // - 1 column with content, 1 column empty -> convert to a single column - // - 2 columns with content -> keep as is - // But now we want to convert everything to be a single column, so we just extract all the blocks and stack them + // If one of the children is empty then don't create a two column layout but + // just return the non-empty child + if (firstChild.childrenResults.length === 0) { + return { + errors, + content: convertAllWpComponentsToArchieMLBlocks( + secondChild.childrenResults + ), + } + } + if (secondChild.childrenResults.length === 0) { + return { + errors, + content: convertAllWpComponentsToArchieMLBlocks( + firstChild.childrenResults + ), + } + } + + // if both columns have content, create a sticky-right layout return { errors, - content: convertAllWpComponentsToArchieMLBlocks([ - ...firstChild.childrenResults, - ...secondChild.childrenResults, - ]), + content: [ + { + type: "sticky-right", + left: convertAllWpComponentsToArchieMLBlocks( + firstChild.childrenResults + ), + right: convertAllWpComponentsToArchieMLBlocks( + secondChild.childrenResults + ), + parseErrors: [], + } as EnrichedBlockStickyRightContainer, + ], } }) .with("owid/prominent-link", () => {