From cc5515ef166313214728aba6146c42ed2b20c528 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 30 Jul 2024 19:58:19 +0200 Subject: [PATCH 01/26] =?UTF-8?q?=F0=9F=94=A8=20rename=20R2=20settings=20t?= =?UTF-8?q?o=20generic=20names?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.devcontainer | 6 +++--- .env.example-full | 6 +++--- db/model/Image.ts | 16 ++++++++-------- settings/serverSettings.ts | 18 ++++++++---------- site/gdocs/components/Image.tsx | 8 +++----- 5 files changed, 25 insertions(+), 29 deletions(-) diff --git a/.env.devcontainer b/.env.devcontainer index 7bc29cc6500..dc8a8bd8d54 100644 --- a/.env.devcontainer +++ b/.env.devcontainer @@ -16,8 +16,8 @@ GDOCS_CLIENT_ID='' GDOCS_BASIC_ARTICLE_TEMPLATE_URL='' GDOCS_SHARED_DRIVE_ID='' -IMAGE_HOSTING_R2_ENDPOINT='' +R2_ENDPOINT='' IMAGE_HOSTING_R2_CDN_URL='' IMAGE_HOSTING_R2_BUCKET_PATH='' -IMAGE_HOSTING_R2_ACCESS_KEY_ID='' -IMAGE_HOSTING_R2_SECRET_ACCESS_KEY='' +R2_ACCESS_KEY_ID='' +R2_SECRET_ACCESS_KEY='' diff --git a/.env.example-full b/.env.example-full index 8407d05b69b..24ad974562f 100644 --- a/.env.example-full +++ b/.env.example-full @@ -22,11 +22,11 @@ GDOCS_BASIC_ARTICLE_TEMPLATE_URL= GDOCS_SHARED_DRIVE_ID= GDOCS_DONATE_FAQS_DOCUMENT_ID= # optional -IMAGE_HOSTING_R2_ENDPOINT= # optional +R2_ENDPOINT= # optional IMAGE_HOSTING_R2_CDN_URL= IMAGE_HOSTING_R2_BUCKET_PATH= -IMAGE_HOSTING_R2_ACCESS_KEY_ID= # optional -IMAGE_HOSTING_R2_SECRET_ACCESS_KEY= # optional +R2_ACCESS_KEY_ID= # optional +R2_SECRET_ACCESS_KEY= # optional OPENAI_API_KEY= diff --git a/db/model/Image.ts b/db/model/Image.ts index 60799fc9782..7042f529eca 100644 --- a/db/model/Image.ts +++ b/db/model/Image.ts @@ -21,10 +21,10 @@ import { } from "@ourworldindata/utils" import { OwidGoogleAuth } from "../OwidGoogleAuth.js" import { - IMAGE_HOSTING_R2_ENDPOINT, - IMAGE_HOSTING_R2_ACCESS_KEY_ID, - IMAGE_HOSTING_R2_SECRET_ACCESS_KEY, - IMAGE_HOSTING_R2_REGION, + R2_ENDPOINT, + R2_ACCESS_KEY_ID, + R2_SECRET_ACCESS_KEY, + R2_REGION, IMAGE_HOSTING_R2_BUCKET_PATH, GDOCS_CLIENT_EMAIL, GDOCS_SHARED_DRIVE_ID, @@ -139,12 +139,12 @@ class ImageStore { export const imageStore = new ImageStore() export const s3Client = new S3Client({ - endpoint: IMAGE_HOSTING_R2_ENDPOINT, + endpoint: R2_ENDPOINT, forcePathStyle: false, - region: IMAGE_HOSTING_R2_REGION, + region: R2_REGION, credentials: { - accessKeyId: IMAGE_HOSTING_R2_ACCESS_KEY_ID, - secretAccessKey: IMAGE_HOSTING_R2_SECRET_ACCESS_KEY, + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, }, }) diff --git a/settings/serverSettings.ts b/settings/serverSettings.ts index c6f3c42cbf2..243a7b13873 100644 --- a/settings/serverSettings.ts +++ b/settings/serverSettings.ts @@ -154,22 +154,20 @@ export const IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH: string = IMAGE_HOSTING_R2_BUCKET_PATH.indexOf("/") + 1 ) // extract R2 credentials from rclone config as defaults -export const IMAGE_HOSTING_R2_ENDPOINT: string = - serverSettings.IMAGE_HOSTING_R2_ENDPOINT || +export const R2_ENDPOINT: string = + serverSettings.R2_ENDPOINT || rcloneConfig["owid-r2"]?.endpoint || "https://078fcdfed9955087315dd86792e71a7e.r2.cloudflarestorage.com" -export const IMAGE_HOSTING_R2_ACCESS_KEY_ID: string = - serverSettings.IMAGE_HOSTING_R2_ACCESS_KEY_ID || +export const R2_ACCESS_KEY_ID: string = + serverSettings.R2_ACCESS_KEY_ID || rcloneConfig["owid-r2"]?.access_key_id || "" -export const IMAGE_HOSTING_R2_SECRET_ACCESS_KEY: string = - serverSettings.IMAGE_HOSTING_R2_SECRET_ACCESS_KEY || +export const R2_SECRET_ACCESS_KEY: string = + serverSettings.R2_SECRET_ACCESS_KEY || rcloneConfig["owid-r2"]?.secret_access_key || "" -export const IMAGE_HOSTING_R2_REGION: string = - serverSettings.IMAGE_HOSTING_R2_REGION || - rcloneConfig["owid-r2"]?.region || - "auto" +export const R2_REGION: string = + serverSettings.R2_REGION || rcloneConfig["owid-r2"]?.region || "auto" export const DATA_API_URL: string = clientSettings.DATA_API_URL diff --git a/site/gdocs/components/Image.tsx b/site/gdocs/components/Image.tsx index 57e58bbb967..324b2e36f8b 100644 --- a/site/gdocs/components/Image.tsx +++ b/site/gdocs/components/Image.tsx @@ -9,8 +9,8 @@ import { import cx from "classnames" import { LIGHTBOX_IMAGE_CLASS } from "../../Lightbox.js" import { - IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH, - IMAGE_HOSTING_R2_CDN_URL, + R2_BUCKET_SUBFOLDER_PATH, + R2_CDN_URL, } from "../../../settings/clientSettings.js" import { DocumentContext } from "../OwidGdoc.js" import { Container } from "./ArticleBlock.js" @@ -115,9 +115,7 @@ export default function Image(props: { if (isPreviewing) { const makePreviewUrl = (f: string) => - `${IMAGE_HOSTING_R2_CDN_URL}/${IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH}/${encodeURIComponent( - f - )}` + `${R2_CDN_URL}/${R2_BUCKET_SUBFOLDER_PATH}/${encodeURIComponent(f)}` const PreviewSource = (props: { i?: ImageMetadata; sm?: boolean }) => { const { i, sm } = props From bc2f0412315c2a055e393950a21482f71b3ddaa0 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 1 Aug 2024 18:50:40 +0200 Subject: [PATCH 02/26] =?UTF-8?q?=F0=9F=90=9B=20undo=20accidental=20rename?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- site/gdocs/components/Image.tsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/site/gdocs/components/Image.tsx b/site/gdocs/components/Image.tsx index 324b2e36f8b..3aceb05b773 100644 --- a/site/gdocs/components/Image.tsx +++ b/site/gdocs/components/Image.tsx @@ -9,8 +9,8 @@ import { import cx from "classnames" import { LIGHTBOX_IMAGE_CLASS } from "../../Lightbox.js" import { - R2_BUCKET_SUBFOLDER_PATH, - R2_CDN_URL, + IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH, + IMAGE_HOSTING_R2_CDN_URL, } from "../../../settings/clientSettings.js" import { DocumentContext } from "../OwidGdoc.js" import { Container } from "./ArticleBlock.js" @@ -115,7 +115,7 @@ export default function Image(props: { if (isPreviewing) { const makePreviewUrl = (f: string) => - `${R2_CDN_URL}/${R2_BUCKET_SUBFOLDER_PATH}/${encodeURIComponent(f)}` + `${IMAGE_HOSTING_R2_CDN_URL}/${IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH}/${encodeURIComponent(f)}` const PreviewSource = (props: { i?: ImageMetadata; sm?: boolean }) => { const { i, sm } = props From 9a61bc86747b79d5dadf8552c6e377734725aa55 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Fri, 2 Aug 2024 11:14:06 +0200 Subject: [PATCH 03/26] =?UTF-8?q?=E2=9C=A8=20add=20saving=20chart=20config?= =?UTF-8?q?s=20to=20R2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteServer/apiRouter.ts | 114 ++++++++++-- adminSiteServer/chartConfigR2Helpers.ts | 165 ++++++++++++++++++ .../1722415645057-AddChartConfigHash.ts | 23 +++ .../@ourworldindata/types/src/NominalType.ts | 8 + .../types/src/dbTypes/ChartConfigs.ts | 1 + packages/@ourworldindata/types/src/index.ts | 2 +- .../@ourworldindata/utils/src/Util.test.ts | 19 ++ packages/@ourworldindata/utils/src/Util.ts | 79 +++++++++ packages/@ourworldindata/utils/src/index.ts | 9 + settings/serverSettings.ts | 9 + 10 files changed, 410 insertions(+), 19 deletions(-) create mode 100644 adminSiteServer/chartConfigR2Helpers.ts create mode 100644 db/migration/1722415645057-AddChartConfigHash.ts diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index c5096c16b24..ba6c4851a55 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -47,6 +47,7 @@ import { checkIsPlainObjectWithGuard, mergeGrapherConfigs, diffGrapherConfigs, + getSHA1HashBase64, } from "@ourworldindata/utils" import { applyPatch } from "../adminShared/patchHelper.js" import { @@ -155,6 +156,13 @@ import { GdocDataInsight } from "../db/model/Gdoc/GdocDataInsight.js" import { GdocHomepage } from "../db/model/Gdoc/GdocHomepage.js" import { GdocAuthor } from "../db/model/Gdoc/GdocAuthor.js" import path from "path" +import { + deleteGrapherConfigFromR2, + deleteGrapherConfigFromR2ByUUID, + R2GrapherConfigDirectory, + saveGrapherConfigToR2, + saveGrapherConfigToR2ByUUID, +} from "./chartConfigR2Helpers.js" const apiRouter = new FunctionalRouter() @@ -275,7 +283,7 @@ const expectChartById = async ( const saveNewChart = async ( knex: db.KnexReadWriteTransaction, { config, user }: { config: GrapherInterface; user: DbPlainUser } -): Promise => { +): Promise<{ patchConfig: GrapherInterface; fullConfig: GrapherInterface }> => { // if the schema version is missing, assume it's the latest if (!config["$schema"]) { config["$schema"] = defaultGrapherConfig["$schema"] @@ -285,16 +293,25 @@ const saveNewChart = async ( const parentConfig = defaultGrapherConfig const patchConfig = diffGrapherConfigs(config, parentConfig) const fullConfig = mergeGrapherConfigs(parentConfig, patchConfig) + const fullConfigStringified = JSON.stringify(fullConfig) + + // compute a sha-1 hash of the full config + const fullConfigSha1 = await getSHA1HashBase64(fullConfigStringified) // insert patch & full configs into the chart_configs table - const configId = uuidv7() + const chartConfigId = uuidv7() await db.knexRaw( knex, `-- sql - INSERT INTO chart_configs (id, patch, full) - VALUES (?, ?, ?) + INSERT INTO chart_configs (id, patch, full, fullSha1) + VALUES (?, ?, ?, ?) `, - [configId, JSON.stringify(patchConfig), JSON.stringify(fullConfig)] + [ + chartConfigId, + JSON.stringify(patchConfig), + fullConfigStringified, + fullConfigSha1, + ] ) // add a new chart to the charts table @@ -304,7 +321,7 @@ const saveNewChart = async ( INSERT INTO charts (configId, lastEditedAt, lastEditedByUserId) VALUES (?, ?, ?) `, - [configId, new Date(), user.id] + [chartConfigId, new Date(), user.id] ) // The chart config itself has an id field that should store the id of the chart - update the chart now so this is true @@ -324,7 +341,9 @@ const saveNewChart = async ( [chartId, chartId, chartId] ) - return patchConfig + await saveGrapherConfigToR2ByUUID(chartConfigId, fullConfigStringified) + + return { patchConfig, fullConfig } } const updateExistingChart = async ( @@ -334,7 +353,7 @@ const updateExistingChart = async ( user, chartId, }: { config: GrapherInterface; user: DbPlainUser; chartId: number } -): Promise => { +): Promise<{ patchConfig: GrapherInterface; fullConfig: GrapherInterface }> => { // make sure that the id of the incoming config matches the chart id config.id = chartId @@ -347,19 +366,37 @@ const updateExistingChart = async ( const parentConfig = defaultGrapherConfig const patchConfig = diffGrapherConfigs(config, parentConfig) const fullConfig = mergeGrapherConfigs(parentConfig, patchConfig) + const fullConfigStringified = JSON.stringify(fullConfig) + + // compute a sha-1 hash of the full config + const fullConfigSha1 = await getSHA1HashBase64(fullConfigStringified) + + const chartConfigId = await db.knexRawFirst>( + knex, + `SELECT configId FROM charts WHERE id = ?`, + [chartId] + ) + + if (!chartConfigId) + throw new JsonError(`No chart config found for id ${chartId}`, 404) // update configs await db.knexRaw( knex, `-- sql - UPDATE chart_configs cc - JOIN charts c ON c.configId = cc.id + UPDATE chart_configs SET - cc.patch=?, - cc.full=? - WHERE c.id = ? + patch=?, + full=?, + fullSha1Base64=? + WHERE id = ? `, - [JSON.stringify(patchConfig), JSON.stringify(fullConfig), chartId] + [ + JSON.stringify(patchConfig), + fullConfigStringified, + fullConfigSha1, + chartConfigId.configId, + ] ) // update charts row @@ -373,7 +410,12 @@ const updateExistingChart = async ( [new Date(), user.id, chartId] ) - return patchConfig + await saveGrapherConfigToR2ByUUID( + chartConfigId.configId, + fullConfigStringified + ) + + return { patchConfig, fullConfig } } const saveGrapher = async ( @@ -443,6 +485,11 @@ const saveGrapher = async ( `INSERT INTO chart_slug_redirects (chart_id, slug) VALUES (?, ?)`, [existingConfig.id, existingConfig.slug] ) + // When we rename grapher configs, make sure to delete the old one (the new one will be saved below) + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${existingConfig.slug}.json` + ) } } @@ -457,20 +504,27 @@ const saveGrapher = async ( // Execute the actual database update or creation let chartId: number + let patchConfig: GrapherInterface + let fullConfig: GrapherInterface if (existingConfig) { chartId = existingConfig.id! - newConfig = await updateExistingChart(knex, { + const configs = await updateExistingChart(knex, { config: newConfig, user, chartId, }) + patchConfig = configs.patchConfig + fullConfig = configs.fullConfig } else { - newConfig = await saveNewChart(knex, { + const configs = await saveNewChart(knex, { config: newConfig, user, }) chartId = newConfig.id! + patchConfig = configs.patchConfig + fullConfig = configs.fullConfig } + newConfig = patchConfig // Record this change in version history const chartRevisionLog = { @@ -515,6 +569,17 @@ const saveGrapher = async ( newDimensions.map((d) => d.variableId) ) + if (newConfig.isPublished) { + const configStringified = JSON.stringify(fullConfig) + const configSha1 = await getSHA1HashBase64(configStringified) + await saveGrapherConfigToR2( + configStringified, + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${newConfig.slug}.json`, + configSha1 + ) + } + if ( newConfig.isPublished && (!existingConfig || !existingConfig.isPublished) @@ -537,6 +602,10 @@ const saveGrapher = async ( `DELETE FROM chart_slug_redirects WHERE chart_id = ?`, [existingConfig.id] ) + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${existingConfig.slug}.json` + ) await triggerStaticBuild(user, `Unpublishing chart ${newConfig.slug}`) } else if (newConfig.isPublished) await triggerStaticBuild(user, `Updating chart ${newConfig.slug}`) @@ -883,11 +952,13 @@ deleteRouteWithRWTransaction( [chart.id] ) - const row = await db.knexRawFirst<{ configId: number }>( + const row = await db.knexRawFirst>( trx, `SELECT configId FROM charts WHERE id = ?`, [chart.id] ) + if (!row) + throw new JsonError(`No chart config found for id ${chart.id}`, 404) if (row) { await db.knexRaw(trx, `DELETE FROM charts WHERE id=?`, [chart.id]) await db.knexRaw(trx, `DELETE FROM chart_configs WHERE id=?`, [ @@ -901,6 +972,13 @@ deleteRouteWithRWTransaction( `Deleting chart ${chart.slug}` ) + await deleteGrapherConfigFromR2ByUUID(row.configId) + if (chart.isPublished) + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${chart.slug}.json` + ) + return { success: true } } ) diff --git a/adminSiteServer/chartConfigR2Helpers.ts b/adminSiteServer/chartConfigR2Helpers.ts new file mode 100644 index 00000000000..fe6c34ffc7a --- /dev/null +++ b/adminSiteServer/chartConfigR2Helpers.ts @@ -0,0 +1,165 @@ +import { + GRAPHER_CONFIG_R2_BUCKET, + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2_ACCESS_KEY_ID, + R2_ENDPOINT, + R2_REGION, + R2_SECRET_ACCESS_KEY, +} from "../settings/serverSettings.js" +import { + DeleteObjectCommand, + DeleteObjectCommandInput, + PutObjectCommand, + PutObjectCommandInput, + S3Client, +} from "@aws-sdk/client-s3" +import { + Base64String, + excludeUndefined, + getSHA1HashBase64, + JsonError, +} from "@ourworldindata/utils" +import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js" + +export enum R2GrapherConfigDirectory { + byUUID = "config/by-uuid", + publishedGrapherBySlug = "grapher/by-slug", +} + +let s3Client: S3Client | undefined = undefined + +export async function saveGrapherConfigToR2ByUUID( + id: string, + chartConfigStringified: string +) { + const configSha1 = await getSHA1HashBase64(chartConfigStringified) + + await saveGrapherConfigToR2( + chartConfigStringified, + R2GrapherConfigDirectory.byUUID, + `${id}.json`, + configSha1 + ) +} + +export async function deleteGrapherConfigFromR2ByUUID(id: string) { + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.byUUID, + `${id}.json` + ) +} + +export async function saveGrapherConfigToR2( + config_stringified: string, + directory: R2GrapherConfigDirectory, + filename: string, + config_sha1_hash: Base64String +) { + if ( + GRAPHER_CONFIG_R2_BUCKET === undefined || + GRAPHER_CONFIG_R2_BUCKET_PATH === undefined + ) { + console.info( + "R2 bucket not configured, not storing grapher config to R2" + ) + return + } + try { + if (!s3Client) { + s3Client = new S3Client({ + endpoint: R2_ENDPOINT, + forcePathStyle: false, + region: R2_REGION, + credentials: { + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, + }, + }) + } + + if (!GRAPHER_CONFIG_R2_BUCKET || !GRAPHER_CONFIG_R2_BUCKET_PATH) { + throw new Error("R2 bucket not configured") + } + + const bucket = GRAPHER_CONFIG_R2_BUCKET + // On prod, GRAPHER_CONFIG_R2_BUCKET_PATH might be an empty string and in this case we need to exclude it + const path = excludeUndefined([ + GRAPHER_CONFIG_R2_BUCKET_PATH, + directory, + ]).join("/") + + const MIMEType = "application/json" + + const params: PutObjectCommandInput = { + Bucket: bucket, + Key: `${path}/${filename}`, + Body: config_stringified, + ContentType: MIMEType, + ChecksumSHA1: config_sha1_hash, + } + + await s3Client.send(new PutObjectCommand(params)) + console.log( + `Successfully uploaded object: ${params.Bucket}/${params.Key}` + ) + } catch (err) { + await logErrorAndMaybeSendToBugsnag(err) + throw new JsonError( + `Failed to save the grapher config to R2. Inner error: ${err}` + ) + } +} + +export async function deleteGrapherConfigFromR2( + directory: R2GrapherConfigDirectory, + filename: string +) { + if ( + GRAPHER_CONFIG_R2_BUCKET === undefined || + GRAPHER_CONFIG_R2_BUCKET_PATH === undefined + ) { + console.info( + "R2 bucket not configured, not deleting grapher config to R2" + ) + return + } + try { + if (!s3Client) { + s3Client = new S3Client({ + endpoint: R2_ENDPOINT, + forcePathStyle: false, + region: R2_REGION, + credentials: { + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, + }, + }) + } + + if (!GRAPHER_CONFIG_R2_BUCKET || !GRAPHER_CONFIG_R2_BUCKET_PATH) { + throw new Error("R2 bucket not configured") + } + + const bucket = GRAPHER_CONFIG_R2_BUCKET + // On prod, GRAPHER_CONFIG_R2_BUCKET_PATH might be an empty string and in this case we need to exclude it + const path = excludeUndefined([ + GRAPHER_CONFIG_R2_BUCKET_PATH, + directory, + ]).join("/") + + const params: DeleteObjectCommandInput = { + Bucket: bucket, + Key: `${path}/${filename}`, + } + + await s3Client.send(new DeleteObjectCommand(params)) + console.log( + `Successfully deleted object: ${params.Bucket}/${params.Key}` + ) + } catch (err) { + await logErrorAndMaybeSendToBugsnag(err) + throw new JsonError( + `Failed to delete the grapher config to R2 at ${directory}/${filename}. Inner error: ${err}` + ) + } +} diff --git a/db/migration/1722415645057-AddChartConfigHash.ts b/db/migration/1722415645057-AddChartConfigHash.ts new file mode 100644 index 00000000000..96c8588a45a --- /dev/null +++ b/db/migration/1722415645057-AddChartConfigHash.ts @@ -0,0 +1,23 @@ +import { MigrationInterface, QueryRunner } from "typeorm" + +export class AddChartConfigHash1722415645057 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + // alter the chart_configs table and add a column for a sha-1 has of the full config + await queryRunner.query(` + ALTER TABLE chart_configs + ADD COLUMN fullSha1Base64 CHAR(28); + `) + + await queryRunner.query(` + UPDATE chart_configs + SET fullSha1Base64 = to_base64(unhex(SHA1(full))) + `) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + ALTER TABLE chart_configs + DROP COLUMN fullSha1Base64; + `) + } +} diff --git a/packages/@ourworldindata/types/src/NominalType.ts b/packages/@ourworldindata/types/src/NominalType.ts index f3487f54232..f24497dfb29 100644 --- a/packages/@ourworldindata/types/src/NominalType.ts +++ b/packages/@ourworldindata/types/src/NominalType.ts @@ -20,3 +20,11 @@ declare const __nominal__type: unique symbol export type Nominal = Type & { readonly [__nominal__type]: Identifier } + +export function wrap(obj: T): Nominal { + return obj as Nominal +} + +export function unwrap(obj: Nominal): T { + return obj +} diff --git a/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts b/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts index 24c98ee6b6f..cdc791dfc21 100644 --- a/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts +++ b/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts @@ -6,6 +6,7 @@ export interface DbInsertChartConfig { id: string patch: JsonString full: JsonString + fullSha1Base64?: string slug?: string | null createdAt?: Date updatedAt?: Date | null diff --git a/packages/@ourworldindata/types/src/index.ts b/packages/@ourworldindata/types/src/index.ts index 3ca08233205..3c8e8af77d0 100644 --- a/packages/@ourworldindata/types/src/index.ts +++ b/packages/@ourworldindata/types/src/index.ts @@ -646,7 +646,7 @@ export { export { RedirectCode, type DbPlainRedirect } from "./dbTypes/Redirects.js" -export type { Nominal } from "./NominalType.js" +export { type Nominal, wrap, unwrap } from "./NominalType.js" export { type DbRawLatestWork, diff --git a/packages/@ourworldindata/utils/src/Util.test.ts b/packages/@ourworldindata/utils/src/Util.test.ts index c1cd463fac5..efc593dbfc0 100755 --- a/packages/@ourworldindata/utils/src/Util.test.ts +++ b/packages/@ourworldindata/utils/src/Util.test.ts @@ -29,6 +29,11 @@ import { traverseEnrichedBlock, cartesian, formatInlineList, + getSHA1HashBytes, + bytesToBase64, + base64ToBytes, + bytesToHex, + hexToBytes, } from "./Util.js" import { BlockImageSize, @@ -795,3 +800,17 @@ describe(formatInlineList, () => { ) }) }) + +// a test to see if getSHA1HashBytes encoded to base64 and back to bytes is the same as the original bytes +describe("getSHA1HashBytes going back and forth through base64 and hex yields identical results", () => { + it("hashes a string and decodes it back to the same bytes", async () => { + const bytes = await getSHA1HashBytes("Hello World") + const base64 = bytesToBase64(bytes) + const decodedBytes = base64ToBytes(base64) + expect(decodedBytes).toEqual(bytes) + + const hex = bytesToHex(bytes) + const decodedBytes2 = hexToBytes(hex) + expect(decodedBytes2).toEqual(bytes) + }) +}) diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index 93f0aa0289e..f98072e531e 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -174,10 +174,28 @@ import { TagGraphRoot, TagGraphRootName, TagGraphNode, + Nominal, } from "@ourworldindata/types" import { PointVector } from "./PointVector.js" import React from "react" import { match, P } from "ts-pattern" +// import "crypto" + +let subtleCrypto : any + +if (typeof globalThis.crypto !== "undefined" && globalThis.crypto.subtle) { + // Browsers and Cloudflare Workers + subtleCrypto = globalThis.crypto.subtle +} else { + // Node.js 18+ using the built-in webcrypto module + import("node:crypto") + .then(({ webcrypto }) => { + subtleCrypto = webcrypto.subtle + }) + .catch((err) => { + console.error("Failed to import crypto module:", err) + }) +} export type NoUndefinedValues = { [P in keyof T]: Required> @@ -454,6 +472,67 @@ export const cagr = ( ) } +/** Compute a SHA1 hash for a given string + */ +export async function getSHA1HashBytes(data: string): Promise { + const encoder = new TextEncoder() + const dataBuffer = encoder.encode(data) + const hashBuffer = await subtleCrypto.digest("SHA-1", dataBuffer) + + return new Uint8Array(hashBuffer) +} + +export type Base64String = Nominal +export type HexString = Nominal + +export function base64ToBytes(base64: Base64String): Uint8Array { + const binString = atob(base64) + return Uint8Array.from(binString, (m) => { + const cp = m.codePointAt(0) + if (cp === undefined) throw new Error("Invalid base64") + return cp + }) +} + +export function bytesToBase64(bytes: Uint8Array): Base64String { + const binString = Array.from(bytes, (byte) => + String.fromCodePoint(byte) + ).join("") + return btoa(binString) as Base64String +} + +export function hexToBytes(hex: string): Uint8Array { + if (hex.length % 2 !== 0) throw new Error("Invalid hex") + const bytes = new Uint8Array(hex.length / 2) + for (let i = 0; i < hex.length; i += 2) { + const parsed = parseInt(hex.slice(i, i + 2), 16) + if (isNaN(parsed)) throw new Error("Invalid hex") + bytes[i / 2] = parsed + } + return bytes +} + +export function bytesToHex(bytes: Uint8Array): HexString { + return Array.from(bytes) + .map((byte) => byte.toString(16).padStart(2, "0")) + .join("") as HexString +} + +/** Compute a SHA1 hash for a given string and return it as a string HEX encoded + */ +export async function getSHA1HashHex(data: string): Promise { + const hashBuffer = await getSHA1HashBytes(data) + return bytesToHex(hashBuffer) +} + +/** Compute a SHA1 hash for a given string and return it as a string BASE64 encoded + */ +export async function getSHA1HashBase64(data: string): Promise { + const bytes = await getSHA1HashBytes(data) + const base64 = bytesToBase64(bytes) + return base64 +} + export const makeAnnotationsSlug = (columnSlug: string): string => `${columnSlug}-annotations` diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index bad8f5efa17..d0066888c30 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -20,6 +20,15 @@ export { firstOfNonEmptyArray, lastOfNonEmptyArray, mapToObjectLiteral, + type Base64String, + type HexString, + getSHA1HashBytes, + getSHA1HashHex, + getSHA1HashBase64, + bytesToBase64, + base64ToBytes, + bytesToHex, + hexToBytes, next, previous, domainExtent, diff --git a/settings/serverSettings.ts b/settings/serverSettings.ts index 243a7b13873..d945b49173c 100644 --- a/settings/serverSettings.ts +++ b/settings/serverSettings.ts @@ -169,6 +169,15 @@ export const R2_SECRET_ACCESS_KEY: string = export const R2_REGION: string = serverSettings.R2_REGION || rcloneConfig["owid-r2"]?.region || "auto" +export const GRAPHER_CONFIG_BASE_URL: string = + serverSettings.GRAPHER_CONFIG_BASE_URL || + "https://ourworldindata.org/grapher/" + +export const GRAPHER_CONFIG_R2_BUCKET: string | undefined = + serverSettings.GRAPHER_CONFIG_R2_BUCKET +export const GRAPHER_CONFIG_R2_BUCKET_PATH: string | undefined = + serverSettings.GRAPHER_CONFIG_R2_BUCKET_PATH + export const DATA_API_URL: string = clientSettings.DATA_API_URL export const BUILDKITE_API_ACCESS_TOKEN: string = From 73901f0c3f62c3bb37c093f0bbb6ee5caebf5bf1 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Mon, 5 Aug 2024 09:44:23 +0200 Subject: [PATCH 04/26] =?UTF-8?q?=F0=9F=9A=A7=20WIP=20tool=20to=20sync=20g?= =?UTF-8?q?rapher=20configs=20to=20R2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- devTools/syncGraphersToR2/syncGraphersToR2.ts | 105 ++++++++++++++++++ devTools/syncGraphersToR2/tsconfig.json | 18 +++ package.json | 3 +- tsconfig.json | 3 + 4 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 devTools/syncGraphersToR2/syncGraphersToR2.ts create mode 100644 devTools/syncGraphersToR2/tsconfig.json diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts new file mode 100644 index 00000000000..a2f95c93dad --- /dev/null +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -0,0 +1,105 @@ +import fs from "fs-extra" +import parseArgs from "minimist" +import { + DeleteObjectCommand, + DeleteObjectCommandInput, + ListObjectsCommand, + ListObjectsV2Command, + ListObjectsV2CommandOutput, + PutObjectCommand, + PutObjectCommandInput, + S3Client, +} from "@aws-sdk/client-s3" +import { + GRAPHER_CONFIG_R2_BUCKET, + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2_ACCESS_KEY_ID, + R2_ENDPOINT, + R2_REGION, + R2_SECRET_ACCESS_KEY, +} from "../../settings/serverSettings.js" +import { knexRaw, knexReadonlyTransaction } from "../../db/db.js" +import { R2GrapherConfigDirectory } from "../../adminSiteServer/chartConfigR2Helpers.js" +import { DbRawChartConfig, excludeUndefined } from "@ourworldindata/utils" +import { string } from "ts-pattern/dist/patterns.js" +import { take } from "lodash" + +async function main(parsedArgs: parseArgs.ParsedArgs) { + if ( + GRAPHER_CONFIG_R2_BUCKET === undefined || + GRAPHER_CONFIG_R2_BUCKET_PATH === undefined + ) { + console.info("R2 bucket not configured, exiting") + return + } + + const s3Client = new S3Client({ + endpoint: R2_ENDPOINT, + forcePathStyle: false, + region: R2_REGION, + credentials: { + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, + }, + }) + + await knexReadonlyTransaction(async (trx) => { + const slugsAndHashesFromDb = await knexRaw< + Pick + >( + trx, + `select slug, fullSha1Base64 from chart_configs where slug is not null` + ) + const hashesOfFilesToToUpsert = new Map() + const path = excludeUndefined([ + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2GrapherConfigDirectory.publishedGrapherBySlug, + ]).join("/") + + slugsAndHashesFromDb.forEach((row) => { + hashesOfFilesToToUpsert.set( + `${path}/${row.slug}.json`, + row.fullSha1Base64 + ) + }) + + const hashesOfFilesToDelete = new Map() + + // list the files in the R2 bucket. There may be more files in the + // bucket than can be returned in one list operation so loop until + // all files are listed + let continuationToken: string | undefined = undefined + do { + const listObjectsCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Prefix: path, + ContinuationToken: continuationToken, + } + const listObjectsCommandOutput: ListObjectsV2CommandOutput = + await s3Client.send( + new ListObjectsV2Command(listObjectsCommandInput) + ) + console.log( + "Got next batch of objects", + listObjectsCommandOutput.Contents + ) + if (listObjectsCommandOutput.Contents) { + listObjectsCommandOutput.Contents.forEach((object) => { + if (object.Key && object.ETag) { + hashesOfFilesToDelete.set(object.Key, object.ETag) + } + }) + } + continuationToken = listObjectsCommandOutput.NextContinuationToken + } while (continuationToken) + + console.log("10 entries ", take(hashesOfFilesToDelete.entries(), 10)) + }) +} + +const parsedArgs = parseArgs(process.argv.slice(2)) +if (parsedArgs["h"]) { + console.log(`syncGraphersToR2.js - sync graphers to R2`) +} else { + main(parsedArgs) +} diff --git a/devTools/syncGraphersToR2/tsconfig.json b/devTools/syncGraphersToR2/tsconfig.json new file mode 100644 index 00000000000..69afea46ff9 --- /dev/null +++ b/devTools/syncGraphersToR2/tsconfig.json @@ -0,0 +1,18 @@ +{ + "extends": "../tsconfigs/tsconfig.base.json", + "compilerOptions": { + "outDir": "../../itsJustJavascript/devTools/syncGrapherToR2", + "rootDir": "." + }, + "references": [ + { + "path": "../../db" + }, + { + "path": "../../adminSiteServer" + }, + { + "path": "../../settings" + }, + ] +} diff --git a/package.json b/package.json index 8ab030f444c..20f8e503d94 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,8 @@ "testPrettierAll": "yarn prettier --check \"**/*.{tsx,ts,jsx,js,json,md,html,css,scss,yml}\"", "testJest": "lerna run buildTests && jest", "testSiteNavigation": "tsx --tsconfig tsconfig.tsx.json devTools/navigationTest/navigationTest.ts", - "generateDbTypes": "npx @rmp135/sql-ts -c db/sql-ts/sql-ts-config.json" + "generateDbTypes": "npx @rmp135/sql-ts -c db/sql-ts/sql-ts-config.json", + "syncGraphersToR2": "tsx --tsconfig tsconfig.tsx.json devTools/syncGraphersToR2/syncGraphersToR2.ts" }, "dependencies": { "@algolia/autocomplete-js": "^1.17.2", diff --git a/tsconfig.json b/tsconfig.json index 94bbeed9aae..ae863d527a2 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -54,6 +54,9 @@ }, { "path": "./devTools/navigationTest" + }, + { + "path": "./devTools/syncGraphersToR2" } ] } From 93703ddf0f4c63d07c71503cf99544c0b4ef47d3 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Mon, 5 Aug 2024 13:53:19 +0200 Subject: [PATCH 05/26] =?UTF-8?q?=F0=9F=94=A8=20switch=20from=20sha1=20to?= =?UTF-8?q?=20md5=20and=20implement=20remaining=20sync=20logic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteServer/apiRouter.ts | 19 +- adminSiteServer/chartConfigR2Helpers.ts | 21 +- .../1722415645057-AddChartConfigHash.ts | 7 +- devTools/syncGraphersToR2/syncGraphersToR2.ts | 204 ++++++++++++++---- .../types/src/dbTypes/ChartConfigs.ts | 2 +- packages/@ourworldindata/utils/src/Util.ts | 23 +- packages/@ourworldindata/utils/src/index.ts | 2 + 7 files changed, 205 insertions(+), 73 deletions(-) diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index ba6c4851a55..86f6215a9e5 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -47,7 +47,6 @@ import { checkIsPlainObjectWithGuard, mergeGrapherConfigs, diffGrapherConfigs, - getSHA1HashBase64, } from "@ourworldindata/utils" import { applyPatch } from "../adminShared/patchHelper.js" import { @@ -162,6 +161,7 @@ import { R2GrapherConfigDirectory, saveGrapherConfigToR2, saveGrapherConfigToR2ByUUID, + getMd5HashBase64, } from "./chartConfigR2Helpers.js" const apiRouter = new FunctionalRouter() @@ -296,21 +296,21 @@ const saveNewChart = async ( const fullConfigStringified = JSON.stringify(fullConfig) // compute a sha-1 hash of the full config - const fullConfigSha1 = await getSHA1HashBase64(fullConfigStringified) + const fullConfigMd5 = await getMd5HashBase64(fullConfigStringified) // insert patch & full configs into the chart_configs table const chartConfigId = uuidv7() await db.knexRaw( knex, `-- sql - INSERT INTO chart_configs (id, patch, full, fullSha1) + INSERT INTO chart_configs (id, patch, full, fullMd5) VALUES (?, ?, ?, ?) `, [ chartConfigId, JSON.stringify(patchConfig), fullConfigStringified, - fullConfigSha1, + fullConfigMd5, ] ) @@ -368,8 +368,7 @@ const updateExistingChart = async ( const fullConfig = mergeGrapherConfigs(parentConfig, patchConfig) const fullConfigStringified = JSON.stringify(fullConfig) - // compute a sha-1 hash of the full config - const fullConfigSha1 = await getSHA1HashBase64(fullConfigStringified) + const fullConfigMd5 = await getMd5HashBase64(fullConfigStringified) const chartConfigId = await db.knexRawFirst>( knex, @@ -388,13 +387,13 @@ const updateExistingChart = async ( SET patch=?, full=?, - fullSha1Base64=? + fullMd5=? WHERE id = ? `, [ JSON.stringify(patchConfig), fullConfigStringified, - fullConfigSha1, + fullConfigMd5, chartConfigId.configId, ] ) @@ -571,12 +570,12 @@ const saveGrapher = async ( if (newConfig.isPublished) { const configStringified = JSON.stringify(fullConfig) - const configSha1 = await getSHA1HashBase64(configStringified) + const configMd5 = await getMd5HashBase64(configStringified) await saveGrapherConfigToR2( configStringified, R2GrapherConfigDirectory.publishedGrapherBySlug, `${newConfig.slug}.json`, - configSha1 + configMd5 ) } diff --git a/adminSiteServer/chartConfigR2Helpers.ts b/adminSiteServer/chartConfigR2Helpers.ts index fe6c34ffc7a..219c7b8914b 100644 --- a/adminSiteServer/chartConfigR2Helpers.ts +++ b/adminSiteServer/chartConfigR2Helpers.ts @@ -16,11 +16,20 @@ import { import { Base64String, excludeUndefined, - getSHA1HashBase64, JsonError, + getMd5HashBase64, } from "@ourworldindata/utils" import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js" - +import { createHash } from "crypto" + +export function getMd5HashBase64(data: string): Base64String { + // I would have liked to create a function in utils that can compute a varienty of hashes + // in both the browser, CF workers and node but unfortunately this isn't easily possible + // for md5 - so here we just special case for md5, node and base64 encoding for now. + return createHash("md5") + .update(data, "utf-8") + .digest("base64") as Base64String +} export enum R2GrapherConfigDirectory { byUUID = "config/by-uuid", publishedGrapherBySlug = "grapher/by-slug", @@ -32,13 +41,13 @@ export async function saveGrapherConfigToR2ByUUID( id: string, chartConfigStringified: string ) { - const configSha1 = await getSHA1HashBase64(chartConfigStringified) + const configMd5 = await getMd5HashBase64(chartConfigStringified) await saveGrapherConfigToR2( chartConfigStringified, R2GrapherConfigDirectory.byUUID, `${id}.json`, - configSha1 + configMd5 ) } @@ -53,7 +62,7 @@ export async function saveGrapherConfigToR2( config_stringified: string, directory: R2GrapherConfigDirectory, filename: string, - config_sha1_hash: Base64String + configMd5: Base64String ) { if ( GRAPHER_CONFIG_R2_BUCKET === undefined || @@ -95,7 +104,7 @@ export async function saveGrapherConfigToR2( Key: `${path}/${filename}`, Body: config_stringified, ContentType: MIMEType, - ChecksumSHA1: config_sha1_hash, + ContentMD5: configMd5, } await s3Client.send(new PutObjectCommand(params)) diff --git a/db/migration/1722415645057-AddChartConfigHash.ts b/db/migration/1722415645057-AddChartConfigHash.ts index 96c8588a45a..8885900a088 100644 --- a/db/migration/1722415645057-AddChartConfigHash.ts +++ b/db/migration/1722415645057-AddChartConfigHash.ts @@ -2,22 +2,21 @@ import { MigrationInterface, QueryRunner } from "typeorm" export class AddChartConfigHash1722415645057 implements MigrationInterface { public async up(queryRunner: QueryRunner): Promise { - // alter the chart_configs table and add a column for a sha-1 has of the full config await queryRunner.query(` ALTER TABLE chart_configs - ADD COLUMN fullSha1Base64 CHAR(28); + ADD COLUMN fullMd5 CHAR(24); `) await queryRunner.query(` UPDATE chart_configs - SET fullSha1Base64 = to_base64(unhex(SHA1(full))) + SET fullMd5 = to_base64(unhex(md5(full))) `) } public async down(queryRunner: QueryRunner): Promise { await queryRunner.query(` ALTER TABLE chart_configs - DROP COLUMN fullSha1Base64; + DROP COLUMN fullMd5; `) } } diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index a2f95c93dad..e4abd461c23 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -18,13 +18,121 @@ import { R2_REGION, R2_SECRET_ACCESS_KEY, } from "../../settings/serverSettings.js" -import { knexRaw, knexReadonlyTransaction } from "../../db/db.js" +import { + knexRaw, + KnexReadonlyTransaction, + knexReadonlyTransaction, +} from "../../db/db.js" import { R2GrapherConfigDirectory } from "../../adminSiteServer/chartConfigR2Helpers.js" -import { DbRawChartConfig, excludeUndefined } from "@ourworldindata/utils" +import { + base64ToBytes, + bytesToBase64, + DbRawChartConfig, + differenceOfSets, + excludeUndefined, + HexString, + hexToBytes, +} from "@ourworldindata/utils" import { string } from "ts-pattern/dist/patterns.js" -import { take } from "lodash" +import { chunk, take } from "lodash" + +type HashAndId = Pick + +async function syncWithR2( + s3Client: S3Client, + pathPrefix: string, + hashesOfFilesToToUpsert: Map, + trx: KnexReadonlyTransaction, + dryRun: boolean = false +) { + const hashesOfFilesToDelete = new Map() + + // list the files in the R2 bucket. There may be more files in the + // bucket than can be returned in one list operation so loop until + // all files are listed + let continuationToken: string | undefined = undefined + do { + const listObjectsCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Prefix: pathPrefix, + ContinuationToken: continuationToken, + } + const listObjectsCommandOutput: ListObjectsV2CommandOutput = + await s3Client.send( + new ListObjectsV2Command(listObjectsCommandInput) + ) + if ((listObjectsCommandOutput.Contents?.length ?? 0) > 0) { + listObjectsCommandOutput.Contents!.forEach((object) => { + if (object.Key && object.ETag) { + // For some reason the etag has quotes around it, strip those + const md5 = object.ETag.replace(/"/g, "") as HexString + const md5Base64 = bytesToBase64(hexToBytes(md5)) + + if ( + hashesOfFilesToToUpsert.has(object.Key) && + hashesOfFilesToToUpsert.get(object.Key)?.fullMd5 === + md5Base64 + ) { + hashesOfFilesToToUpsert.delete(object.Key) + } else { + hashesOfFilesToDelete.set(object.Key, md5Base64) + } + } + }) + } + continuationToken = listObjectsCommandOutput.NextContinuationToken + } while (continuationToken) + + console.log("Number of files to upsert", hashesOfFilesToToUpsert.size) + console.log("Number of files to delete", hashesOfFilesToDelete.size) + + for (const [key, _] of hashesOfFilesToDelete.entries()) { + const deleteObjectCommandInput: DeleteObjectCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Key: key, + } + if (!dryRun) + await s3Client.send( + new DeleteObjectCommand(deleteObjectCommandInput) + ) + else console.log("Would have deleted", key) + } + + // Chunk the inserts so that we don't need to keep all the full configs in memory + for (const batch of chunk([...hashesOfFilesToToUpsert.entries()], 100)) { + const fullConfigs = await knexRaw< + Pick + >(trx, `select id, full from chart_configs where id in (?)`, [ + batch.map((entry) => entry[1].id), + ]) + const fullConfigMap = new Map( + fullConfigs.map(({ id, full }) => [id, full]) + ) + const uploadPromises = batch.map(async ([key, val]) => { + const id = val.id + const fullMd5 = val.fullMd5 + const full = fullConfigMap.get(id) + if (full === undefined) { + console.error(`Full config not found for id ${id}`) + return + } + const putObjectCommandInput: PutObjectCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Key: key, + Body: full, + ContentMD5: fullMd5, + } + if (!dryRun) + return s3Client.send( + new PutObjectCommand(putObjectCommandInput) + ) + else console.log("Would have upserted", key) + }) + await Promise.all(uploadPromises) + } +} -async function main(parsedArgs: parseArgs.ParsedArgs) { +async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { if ( GRAPHER_CONFIG_R2_BUCKET === undefined || GRAPHER_CONFIG_R2_BUCKET_PATH === undefined @@ -43,57 +151,67 @@ async function main(parsedArgs: parseArgs.ParsedArgs) { }, }) + const hashesOfFilesToToUpsertBySlug = new Map() + const hashesOfFilesToToUpsertByUuid = new Map() + const pathPrefixBySlug = excludeUndefined([ + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2GrapherConfigDirectory.publishedGrapherBySlug, + ]).join("/") + + const pathPrefixByUuid = excludeUndefined([ + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2GrapherConfigDirectory.byUUID, + ]).join("/") + await knexReadonlyTransaction(async (trx) => { + // Ensure that the published charts exist by slug const slugsAndHashesFromDb = await knexRaw< - Pick + Pick >( trx, - `select slug, fullSha1Base64 from chart_configs where slug is not null` + `select slug, fullMd5, id from chart_configs where slug is not null` ) - const hashesOfFilesToToUpsert = new Map() - const path = excludeUndefined([ - GRAPHER_CONFIG_R2_BUCKET_PATH, - R2GrapherConfigDirectory.publishedGrapherBySlug, - ]).join("/") slugsAndHashesFromDb.forEach((row) => { - hashesOfFilesToToUpsert.set( - `${path}/${row.slug}.json`, - row.fullSha1Base64 + hashesOfFilesToToUpsertBySlug.set( + `${pathPrefixBySlug}/${row.slug}.json`, + { + fullMd5: row.fullMd5, + id: row.id, + } ) }) - const hashesOfFilesToDelete = new Map() + await syncWithR2( + s3Client, + pathPrefixBySlug, + hashesOfFilesToToUpsertBySlug, + trx, + dryRun + ) - // list the files in the R2 bucket. There may be more files in the - // bucket than can be returned in one list operation so loop until - // all files are listed - let continuationToken: string | undefined = undefined - do { - const listObjectsCommandInput = { - Bucket: GRAPHER_CONFIG_R2_BUCKET, - Prefix: path, - ContinuationToken: continuationToken, - } - const listObjectsCommandOutput: ListObjectsV2CommandOutput = - await s3Client.send( - new ListObjectsV2Command(listObjectsCommandInput) - ) - console.log( - "Got next batch of objects", - listObjectsCommandOutput.Contents + // Ensure that all chart configs exist by id + const slugsAndHashesFromDbByUuid = await knexRaw< + Pick + >(trx, `select fullMd5, id from chart_configs`) + + slugsAndHashesFromDbByUuid.forEach((row) => { + hashesOfFilesToToUpsertByUuid.set( + `${pathPrefixByUuid}/${row.id}.json`, + { + fullMd5: row.fullMd5, + id: row.id, + } ) - if (listObjectsCommandOutput.Contents) { - listObjectsCommandOutput.Contents.forEach((object) => { - if (object.Key && object.ETag) { - hashesOfFilesToDelete.set(object.Key, object.ETag) - } - }) - } - continuationToken = listObjectsCommandOutput.NextContinuationToken - } while (continuationToken) + }) - console.log("10 entries ", take(hashesOfFilesToDelete.entries(), 10)) + await syncWithR2( + s3Client, + pathPrefixByUuid, + hashesOfFilesToToUpsertByUuid, + trx, + dryRun + ) }) } @@ -101,5 +219,5 @@ const parsedArgs = parseArgs(process.argv.slice(2)) if (parsedArgs["h"]) { console.log(`syncGraphersToR2.js - sync graphers to R2`) } else { - main(parsedArgs) + main(parsedArgs, parsedArgs["dry-run"]) } diff --git a/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts b/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts index cdc791dfc21..b1db3c1e82b 100644 --- a/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts +++ b/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts @@ -6,7 +6,7 @@ export interface DbInsertChartConfig { id: string patch: JsonString full: JsonString - fullSha1Base64?: string + fullMd5?: string slug?: string | null createdAt?: Date updatedAt?: Date | null diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index f98072e531e..f5e927d08d3 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -181,7 +181,7 @@ import React from "react" import { match, P } from "ts-pattern" // import "crypto" -let subtleCrypto : any +let subtleCrypto: any if (typeof globalThis.crypto !== "undefined" && globalThis.crypto.subtle) { // Browsers and Cloudflare Workers @@ -472,16 +472,25 @@ export const cagr = ( ) } -/** Compute a SHA1 hash for a given string - */ -export async function getSHA1HashBytes(data: string): Promise { +export enum HashAlgorithm { + Sha1 = "sha1", +} + +export async function getHashBytes( + data: string, + algorithm: HashAlgorithm +): Promise { const encoder = new TextEncoder() const dataBuffer = encoder.encode(data) - const hashBuffer = await subtleCrypto.digest("SHA-1", dataBuffer) + const hashBuffer = await subtleCrypto.digest(algorithm, dataBuffer) return new Uint8Array(hashBuffer) } +export async function getSHA1HashBytes(data: string): Promise { + return getHashBytes(data, HashAlgorithm.Sha1) +} + export type Base64String = Nominal export type HexString = Nominal @@ -518,15 +527,11 @@ export function bytesToHex(bytes: Uint8Array): HexString { .join("") as HexString } -/** Compute a SHA1 hash for a given string and return it as a string HEX encoded - */ export async function getSHA1HashHex(data: string): Promise { const hashBuffer = await getSHA1HashBytes(data) return bytesToHex(hashBuffer) } -/** Compute a SHA1 hash for a given string and return it as a string BASE64 encoded - */ export async function getSHA1HashBase64(data: string): Promise { const bytes = await getSHA1HashBytes(data) const base64 = bytesToBase64(bytes) diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index d0066888c30..4fea8daa233 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -22,9 +22,11 @@ export { mapToObjectLiteral, type Base64String, type HexString, + type HashAlgorithm, getSHA1HashBytes, getSHA1HashHex, getSHA1HashBase64, + getHashBytes, bytesToBase64, base64ToBytes, bytesToHex, From 04304f18c452a2554d8b0a4f98cd5acd907bba8e Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Mon, 5 Aug 2024 15:11:50 +0200 Subject: [PATCH 06/26] =?UTF-8?q?=F0=9F=92=84=20add=20progress=20bar?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- devTools/syncGraphersToR2/syncGraphersToR2.ts | 56 +++++++++++++++---- 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index e4abd461c23..e0fe4fe3e71 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -35,6 +35,7 @@ import { } from "@ourworldindata/utils" import { string } from "ts-pattern/dist/patterns.js" import { chunk, take } from "lodash" +import ProgressBar from "progress" type HashAndId = Pick @@ -86,6 +87,13 @@ async function syncWithR2( console.log("Number of files to upsert", hashesOfFilesToToUpsert.size) console.log("Number of files to delete", hashesOfFilesToDelete.size) + let progressBar = new ProgressBar( + "--- Deleting obsolote configs [:bar] :current/:total :elapseds\n", + { + total: hashesOfFilesToDelete.size, + } + ) + for (const [key, _] of hashesOfFilesToDelete.entries()) { const deleteObjectCommandInput: DeleteObjectCommandInput = { Bucket: GRAPHER_CONFIG_R2_BUCKET, @@ -96,8 +104,20 @@ async function syncWithR2( new DeleteObjectCommand(deleteObjectCommandInput) ) else console.log("Would have deleted", key) + progressBar.tick() } + console.log("Finished deletes") + + progressBar = new ProgressBar( + "--- Storing missing configs [:bar] :current/:total :elapseds\n", + { + total: hashesOfFilesToToUpsert.size, + } + ) + + const errors = [] + // Chunk the inserts so that we don't need to keep all the full configs in memory for (const batch of chunk([...hashesOfFilesToToUpsert.entries()], 100)) { const fullConfigs = await knexRaw< @@ -116,19 +136,33 @@ async function syncWithR2( console.error(`Full config not found for id ${id}`) return } - const putObjectCommandInput: PutObjectCommandInput = { - Bucket: GRAPHER_CONFIG_R2_BUCKET, - Key: key, - Body: full, - ContentMD5: fullMd5, + try { + const putObjectCommandInput: PutObjectCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Key: key, + Body: full, + ContentMD5: fullMd5, + } + if (!dryRun) + await s3Client.send( + new PutObjectCommand(putObjectCommandInput) + ) + else console.log("Would have upserted", key) + } catch (err) { + return err } - if (!dryRun) - return s3Client.send( - new PutObjectCommand(putObjectCommandInput) - ) - else console.log("Would have upserted", key) + progressBar.tick() }) - await Promise.all(uploadPromises) + const promiseResults = await Promise.allSettled(uploadPromises) + const batchErrors = promiseResults + .filter((result) => result.status === "rejected") + .map((result) => result.reason) + errors.push(...batchErrors) + } + + console.log("Finished upserts") + if (errors.length > 0) { + console.error("Errors during upserts", errors) } } From e9f965ce84b4639512db4ede532adc7b17f2334d Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 7 Aug 2024 09:33:32 +0200 Subject: [PATCH 07/26] =?UTF-8?q?=F0=9F=90=9D=20fix=20prettier=20issue?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- devTools/syncGraphersToR2/tsconfig.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devTools/syncGraphersToR2/tsconfig.json b/devTools/syncGraphersToR2/tsconfig.json index 69afea46ff9..74f2eaadbb6 100644 --- a/devTools/syncGraphersToR2/tsconfig.json +++ b/devTools/syncGraphersToR2/tsconfig.json @@ -13,6 +13,6 @@ }, { "path": "../../settings" - }, + } ] } From bc54d766b24470accfaf2113044c431412bab613 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 7 Aug 2024 11:46:50 +0200 Subject: [PATCH 08/26] =?UTF-8?q?=F0=9F=90=9D=20fix=20minor=20issues?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteServer/chartConfigR2Helpers.ts | 1 - devTools/syncGraphersToR2/syncGraphersToR2.ts | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/adminSiteServer/chartConfigR2Helpers.ts b/adminSiteServer/chartConfigR2Helpers.ts index 219c7b8914b..21ca2c4e162 100644 --- a/adminSiteServer/chartConfigR2Helpers.ts +++ b/adminSiteServer/chartConfigR2Helpers.ts @@ -17,7 +17,6 @@ import { Base64String, excludeUndefined, JsonError, - getMd5HashBase64, } from "@ourworldindata/utils" import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js" import { createHash } from "crypto" diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index e0fe4fe3e71..9d270e4d75f 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -134,7 +134,7 @@ async function syncWithR2( const full = fullConfigMap.get(id) if (full === undefined) { console.error(`Full config not found for id ${id}`) - return + return null } try { const putObjectCommandInput: PutObjectCommandInput = { @@ -148,10 +148,11 @@ async function syncWithR2( new PutObjectCommand(putObjectCommandInput) ) else console.log("Would have upserted", key) + progressBar.tick() + return null } catch (err) { return err } - progressBar.tick() }) const promiseResults = await Promise.allSettled(uploadPromises) const batchErrors = promiseResults From 7260140356d3c9e629ed90d10299efd4fc16c7a0 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 7 Aug 2024 22:17:36 +0200 Subject: [PATCH 09/26] =?UTF-8?q?=F0=9F=94=A8=20remove=20SHA-1=20functions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../@ourworldindata/utils/src/Util.test.ts | 19 -------- packages/@ourworldindata/utils/src/Util.ts | 46 ------------------- packages/@ourworldindata/utils/src/index.ts | 5 -- 3 files changed, 70 deletions(-) diff --git a/packages/@ourworldindata/utils/src/Util.test.ts b/packages/@ourworldindata/utils/src/Util.test.ts index efc593dbfc0..c1cd463fac5 100755 --- a/packages/@ourworldindata/utils/src/Util.test.ts +++ b/packages/@ourworldindata/utils/src/Util.test.ts @@ -29,11 +29,6 @@ import { traverseEnrichedBlock, cartesian, formatInlineList, - getSHA1HashBytes, - bytesToBase64, - base64ToBytes, - bytesToHex, - hexToBytes, } from "./Util.js" import { BlockImageSize, @@ -800,17 +795,3 @@ describe(formatInlineList, () => { ) }) }) - -// a test to see if getSHA1HashBytes encoded to base64 and back to bytes is the same as the original bytes -describe("getSHA1HashBytes going back and forth through base64 and hex yields identical results", () => { - it("hashes a string and decodes it back to the same bytes", async () => { - const bytes = await getSHA1HashBytes("Hello World") - const base64 = bytesToBase64(bytes) - const decodedBytes = base64ToBytes(base64) - expect(decodedBytes).toEqual(bytes) - - const hex = bytesToHex(bytes) - const decodedBytes2 = hexToBytes(hex) - expect(decodedBytes2).toEqual(bytes) - }) -}) diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index f5e927d08d3..58c753efa91 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -181,22 +181,6 @@ import React from "react" import { match, P } from "ts-pattern" // import "crypto" -let subtleCrypto: any - -if (typeof globalThis.crypto !== "undefined" && globalThis.crypto.subtle) { - // Browsers and Cloudflare Workers - subtleCrypto = globalThis.crypto.subtle -} else { - // Node.js 18+ using the built-in webcrypto module - import("node:crypto") - .then(({ webcrypto }) => { - subtleCrypto = webcrypto.subtle - }) - .catch((err) => { - console.error("Failed to import crypto module:", err) - }) -} - export type NoUndefinedValues = { [P in keyof T]: Required> } @@ -472,25 +456,6 @@ export const cagr = ( ) } -export enum HashAlgorithm { - Sha1 = "sha1", -} - -export async function getHashBytes( - data: string, - algorithm: HashAlgorithm -): Promise { - const encoder = new TextEncoder() - const dataBuffer = encoder.encode(data) - const hashBuffer = await subtleCrypto.digest(algorithm, dataBuffer) - - return new Uint8Array(hashBuffer) -} - -export async function getSHA1HashBytes(data: string): Promise { - return getHashBytes(data, HashAlgorithm.Sha1) -} - export type Base64String = Nominal export type HexString = Nominal @@ -527,17 +492,6 @@ export function bytesToHex(bytes: Uint8Array): HexString { .join("") as HexString } -export async function getSHA1HashHex(data: string): Promise { - const hashBuffer = await getSHA1HashBytes(data) - return bytesToHex(hashBuffer) -} - -export async function getSHA1HashBase64(data: string): Promise { - const bytes = await getSHA1HashBytes(data) - const base64 = bytesToBase64(bytes) - return base64 -} - export const makeAnnotationsSlug = (columnSlug: string): string => `${columnSlug}-annotations` diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index 4fea8daa233..c94ef244060 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -22,11 +22,6 @@ export { mapToObjectLiteral, type Base64String, type HexString, - type HashAlgorithm, - getSHA1HashBytes, - getSHA1HashHex, - getSHA1HashBase64, - getHashBytes, bytesToBase64, base64ToBytes, bytesToHex, From 93a34ff48ccefde6c00493703682bdd8315223dc Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 8 Aug 2024 11:39:31 +0200 Subject: [PATCH 10/26] =?UTF-8?q?=F0=9F=94=A8=20add=20tests=20for=20base64?= =?UTF-8?q?/hex=20conversion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../@ourworldindata/utils/src/Util.test.ts | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/packages/@ourworldindata/utils/src/Util.test.ts b/packages/@ourworldindata/utils/src/Util.test.ts index c1cd463fac5..a10e4c9efd0 100755 --- a/packages/@ourworldindata/utils/src/Util.test.ts +++ b/packages/@ourworldindata/utils/src/Util.test.ts @@ -29,12 +29,17 @@ import { traverseEnrichedBlock, cartesian, formatInlineList, + base64ToBytes, + bytesToBase64, + hexToBytes, + bytesToHex, } from "./Util.js" import { BlockImageSize, OwidEnrichedGdocBlock, SortOrder, } from "@ourworldindata/types" +import { webcrypto as crypto } from "node:crypto" describe(findClosestTime, () => { describe("without tolerance", () => { @@ -795,3 +800,24 @@ describe(formatInlineList, () => { ) }) }) + +function generateRandomBytes(length: number): Uint8Array { + const bytes = new Uint8Array(length) + crypto.getRandomValues(bytes) + return bytes +} + +describe("hex/base64 conversion is reversible", () => { + const originalBytes = generateRandomBytes(33) + const base64String = bytesToBase64(originalBytes) + const roundTrippedBytes = base64ToBytes(base64String) + it("is the same after converting to base64 and back", () => { + expect(originalBytes).toEqual(roundTrippedBytes) + }) + + const hexString = bytesToHex(originalBytes) + const roundTrippedBytesHex = hexToBytes(hexString) + it("is the same after converting to hex and back", () => { + expect(originalBytes).toEqual(roundTrippedBytesHex) + }) +}) From 8c1b00e44665b7bca209b2c9d7d0bb7517312934 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 8 Aug 2024 13:06:08 +0200 Subject: [PATCH 11/26] =?UTF-8?q?=F0=9F=92=84=20minor=20improvements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteServer/chartConfigR2Helpers.ts | 22 ++--- devTools/syncGraphersToR2/syncGraphersToR2.ts | 83 ++++++++++++------- 2 files changed, 64 insertions(+), 41 deletions(-) diff --git a/adminSiteServer/chartConfigR2Helpers.ts b/adminSiteServer/chartConfigR2Helpers.ts index 21ca2c4e162..5ec7d389a47 100644 --- a/adminSiteServer/chartConfigR2Helpers.ts +++ b/adminSiteServer/chartConfigR2Helpers.ts @@ -90,17 +90,15 @@ export async function saveGrapherConfigToR2( } const bucket = GRAPHER_CONFIG_R2_BUCKET - // On prod, GRAPHER_CONFIG_R2_BUCKET_PATH might be an empty string and in this case we need to exclude it - const path = excludeUndefined([ - GRAPHER_CONFIG_R2_BUCKET_PATH, - directory, - ]).join("/") + const path = [GRAPHER_CONFIG_R2_BUCKET_PATH, directory, filename].join( + "/" + ) const MIMEType = "application/json" const params: PutObjectCommandInput = { Bucket: bucket, - Key: `${path}/${filename}`, + Key: path, Body: config_stringified, ContentType: MIMEType, ContentMD5: configMd5, @@ -149,15 +147,13 @@ export async function deleteGrapherConfigFromR2( } const bucket = GRAPHER_CONFIG_R2_BUCKET - // On prod, GRAPHER_CONFIG_R2_BUCKET_PATH might be an empty string and in this case we need to exclude it - const path = excludeUndefined([ - GRAPHER_CONFIG_R2_BUCKET_PATH, - directory, - ]).join("/") + const path = [GRAPHER_CONFIG_R2_BUCKET_PATH, directory, filename].join( + "/" + ) const params: DeleteObjectCommandInput = { Bucket: bucket, - Key: `${path}/${filename}`, + Key: path, } await s3Client.send(new DeleteObjectCommand(params)) @@ -167,7 +163,7 @@ export async function deleteGrapherConfigFromR2( } catch (err) { await logErrorAndMaybeSendToBugsnag(err) throw new JsonError( - `Failed to delete the grapher config to R2 at ${directory}/${filename}. Inner error: ${err}` + `Failed to delete the grapher config to R2 at ${path}. Inner error: ${err}` ) } } diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index 9d270e4d75f..2838a658ad1 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -39,6 +39,15 @@ import ProgressBar from "progress" type HashAndId = Pick +/** Sync a set of chart configs with R2. Pass in a map of the keys to their md5 hashes and UUIDs + and this function will upsert all missing/outdated ones and delete any that are no longer needed. + + @param s3Client The S3 client to use + @param pathPrefix The path prefix to use for the files (e.g. "config/by-uuid" then everything inside it will be synced) + @param hashesOfFilesToToUpsert A map of the keys to their md5 hashes and UUIDs + @param trx The transaction to use for querying the DB for full configs + @param dryRun Whether to actually make changes to R2 or just log what would + */ async function syncWithR2( s3Client: S3Client, pathPrefix: string, @@ -46,6 +55,12 @@ async function syncWithR2( trx: KnexReadonlyTransaction, dryRun: boolean = false ) { + // We'll first get all the files in the R2 bucket under the path prefix + // and check if the hash of each file that exist in R2 matches the hash + // of the file we want to upsert. If it does, we'll remove it from the + // list of files to upsert. If it doesn't, we'll add it to the list of + // files to delete. + const hashesOfFilesToDelete = new Map() // list the files in the R2 bucket. There may be more files in the @@ -69,13 +84,19 @@ async function syncWithR2( const md5 = object.ETag.replace(/"/g, "") as HexString const md5Base64 = bytesToBase64(hexToBytes(md5)) - if ( - hashesOfFilesToToUpsert.has(object.Key) && - hashesOfFilesToToUpsert.get(object.Key)?.fullMd5 === + if (hashesOfFilesToToUpsert.has(object.Key)) { + if ( + hashesOfFilesToToUpsert.get(object.Key)?.fullMd5 === md5Base64 - ) { - hashesOfFilesToToUpsert.delete(object.Key) + ) { + hashesOfFilesToToUpsert.delete(object.Key) + } + // If the existing full config in R2 is different then + // we just keep the hashesOfFilesToToUpsert entry around + // which will upsert the new full config later on } else { + // if the file in R2 is not in the list of files to upsert + // then we should delete it hashesOfFilesToDelete.set(object.Key, md5Base64) } } @@ -88,12 +109,13 @@ async function syncWithR2( console.log("Number of files to delete", hashesOfFilesToDelete.size) let progressBar = new ProgressBar( - "--- Deleting obsolote configs [:bar] :current/:total :elapseds\n", + "--- Deleting obsolete configs [:bar] :current/:total :elapseds\n", { total: hashesOfFilesToDelete.size, } ) + // We could parallelize the deletes but it's not worth the complexity for most cases IMHO for (const [key, _] of hashesOfFilesToDelete.entries()) { const deleteObjectCommandInput: DeleteObjectCommandInput = { Bucket: GRAPHER_CONFIG_R2_BUCKET, @@ -120,6 +142,7 @@ async function syncWithR2( // Chunk the inserts so that we don't need to keep all the full configs in memory for (const batch of chunk([...hashesOfFilesToToUpsert.entries()], 100)) { + // Get the full configs for the batch const fullConfigs = await knexRaw< Pick >(trx, `select id, full from chart_configs where id in (?)`, [ @@ -128,31 +151,28 @@ async function syncWithR2( const fullConfigMap = new Map( fullConfigs.map(({ id, full }) => [id, full]) ) + + // Upload the full configs to R2 in parallel const uploadPromises = batch.map(async ([key, val]) => { const id = val.id const fullMd5 = val.fullMd5 const full = fullConfigMap.get(id) if (full === undefined) { - console.error(`Full config not found for id ${id}`) - return null + return Promise.reject( + new Error(`Full config not found for id ${id}`) + ) } - try { - const putObjectCommandInput: PutObjectCommandInput = { - Bucket: GRAPHER_CONFIG_R2_BUCKET, - Key: key, - Body: full, - ContentMD5: fullMd5, - } - if (!dryRun) - await s3Client.send( - new PutObjectCommand(putObjectCommandInput) - ) - else console.log("Would have upserted", key) - progressBar.tick() - return null - } catch (err) { - return err + const putObjectCommandInput: PutObjectCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Key: key, + Body: full, + ContentMD5: fullMd5, } + if (!dryRun) + await s3Client.send(new PutObjectCommand(putObjectCommandInput)) + else console.log("Would have upserted", key) + progressBar.tick() + return }) const promiseResults = await Promise.allSettled(uploadPromises) const batchErrors = promiseResults @@ -163,7 +183,9 @@ async function syncWithR2( console.log("Finished upserts") if (errors.length > 0) { - console.error("Errors during upserts", errors) + console.error(`${errors.length} Errors during upserts`) + for (const error of errors) { + console.error(error) } } @@ -199,7 +221,7 @@ async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { ]).join("/") await knexReadonlyTransaction(async (trx) => { - // Ensure that the published charts exist by slug + // Sync charts published by slug const slugsAndHashesFromDb = await knexRaw< Pick >( @@ -225,7 +247,8 @@ async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { dryRun ) - // Ensure that all chart configs exist by id + + // Sync charts by UUID const slugsAndHashesFromDbByUuid = await knexRaw< Pick >(trx, `select fullMd5, id from chart_configs`) @@ -252,7 +275,11 @@ async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { const parsedArgs = parseArgs(process.argv.slice(2)) if (parsedArgs["h"]) { - console.log(`syncGraphersToR2.js - sync graphers to R2`) + console.log( + `syncGraphersToR2.js - sync grapher configs from the chart_configs table to R2 + +--dry-run: Don't make any actual changes to R2` + ) } else { main(parsedArgs, parsedArgs["dry-run"]) } From 2fa18100a56f7eff5b1a2f6bd4e591fc64b5fe51 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 8 Aug 2024 13:41:27 +0200 Subject: [PATCH 12/26] =?UTF-8?q?=F0=9F=96=8A=EF=B8=8F=20document=20env=20?= =?UTF-8?q?vars?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example-full | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.env.example-full b/.env.example-full index 24ad974562f..d0cb5063adf 100644 --- a/.env.example-full +++ b/.env.example-full @@ -27,6 +27,12 @@ IMAGE_HOSTING_R2_CDN_URL= IMAGE_HOSTING_R2_BUCKET_PATH= R2_ACCESS_KEY_ID= # optional R2_SECRET_ACCESS_KEY= # optional +# These two GRAPHER_CONFIG_ settings are used to store grapher configs in an R2 bucket. +# The cloudflare workers for thumbnail rendering etc use these settings to fetch the grapher configs. +# This means that for most local dev it is not necessary to set these. +GRAPHER_CONFIG_R2_BUCKET= # optional - for local dev set it to "owid-grapher-configs-staging" +GRAPHER_CONFIG_R2_BUCKET_PATH= # optional - for local dev set it to "devs/YOURNAME" + OPENAI_API_KEY= From bf1cf37b35030b13bcdceb1d3a2f1b12663126d7 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 8 Aug 2024 16:21:47 +0200 Subject: [PATCH 13/26] =?UTF-8?q?=F0=9F=94=A8=20fix=20compile=20errors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteServer/chartConfigR2Helpers.ts | 8 ++------ devTools/syncGraphersToR2/syncGraphersToR2.ts | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/adminSiteServer/chartConfigR2Helpers.ts b/adminSiteServer/chartConfigR2Helpers.ts index 5ec7d389a47..11cff24b08f 100644 --- a/adminSiteServer/chartConfigR2Helpers.ts +++ b/adminSiteServer/chartConfigR2Helpers.ts @@ -13,11 +13,7 @@ import { PutObjectCommandInput, S3Client, } from "@aws-sdk/client-s3" -import { - Base64String, - excludeUndefined, - JsonError, -} from "@ourworldindata/utils" +import { Base64String, JsonError } from "@ourworldindata/utils" import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js" import { createHash } from "crypto" @@ -163,7 +159,7 @@ export async function deleteGrapherConfigFromR2( } catch (err) { await logErrorAndMaybeSendToBugsnag(err) throw new JsonError( - `Failed to delete the grapher config to R2 at ${path}. Inner error: ${err}` + `Failed to delete the grapher config to R2 at ${directory}/${filename}. Inner error: ${err}` ) } } diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index 2838a658ad1..54805b67a1b 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -186,6 +186,7 @@ async function syncWithR2( console.error(`${errors.length} Errors during upserts`) for (const error of errors) { console.error(error) + } } } @@ -247,7 +248,6 @@ async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { dryRun ) - // Sync charts by UUID const slugsAndHashesFromDbByUuid = await knexRaw< Pick From 2125a46893b49b9502899cae049e492e132dbc1d Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 13 Aug 2024 15:46:59 +0200 Subject: [PATCH 14/26] =?UTF-8?q?=F0=9F=90=9B=20fix=20sync=20issue=20not?= =?UTF-8?q?=20taking=20published=20status=20into=20account?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- devTools/syncGraphersToR2/syncGraphersToR2.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index 54805b67a1b..92a92d1c459 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -227,7 +227,10 @@ async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { Pick >( trx, - `select slug, fullMd5, id from chart_configs where slug is not null` + `select slug, fullMd5, id + from chart_configs + where slug is not null + and full ->> '$.isPublished' = "true"` ) slugsAndHashesFromDb.forEach((row) => { From 1821c665c0b94dd076b155fbfff38289082ee239 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 13 Aug 2024 18:28:09 +0200 Subject: [PATCH 15/26] =?UTF-8?q?=F0=9F=94=A8=20make=20deletes=20parallel?= =?UTF-8?q?=20as=20well=20since=20during=20testing=20that=20can=20easily?= =?UTF-8?q?=20happen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- devTools/syncGraphersToR2/syncGraphersToR2.ts | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index 92a92d1c459..b7a546b5f39 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -115,18 +115,21 @@ async function syncWithR2( } ) - // We could parallelize the deletes but it's not worth the complexity for most cases IMHO - for (const [key, _] of hashesOfFilesToDelete.entries()) { - const deleteObjectCommandInput: DeleteObjectCommandInput = { - Bucket: GRAPHER_CONFIG_R2_BUCKET, - Key: key, - } - if (!dryRun) - await s3Client.send( - new DeleteObjectCommand(deleteObjectCommandInput) - ) - else console.log("Would have deleted", key) - progressBar.tick() + // Delete the files in R2 that are no longer needed + for (const batch of chunk([...hashesOfFilesToDelete.entries()], 100)) { + const deletePromises = batch.map(async ([key, _]) => { + const deleteObjectCommandInput: DeleteObjectCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Key: key, + } + if (!dryRun) + await s3Client.send( + new DeleteObjectCommand(deleteObjectCommandInput) + ) + else console.log("Would have deleted", key) + progressBar.tick() + }) + await Promise.allSettled(deletePromises) } console.log("Finished deletes") From f683d475abd59e0d0cc72ad32303a502d94ea845 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 13 Aug 2024 22:20:38 +0200 Subject: [PATCH 16/26] =?UTF-8?q?=F0=9F=90=9B=20fix=20chart=20saving=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteServer/apiRouter.ts | 2 +- adminSiteServer/chartConfigR2Helpers.ts | 8 ++++++++ ...4725a087476a0964c6bb55f73546000001914c7f6943 | 1 + ...14840ceecbac0b694f727b4fef54000001914c7f30cf | 1 + ...fb1baf92d034efbcaaf4336379640ed744ded.sqlite | Bin 0 -> 20480 bytes ...16ac93bc6052b310dbe98d31b29964876a4e5.sqlite | Bin 0 -> 32768 bytes ...bd82f089c2ada26f16edfab7682f000001914c3c5a4e | 1 + 7 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 cfstorage/v3/cache/default/blobs/e43e315d93bd1d24f23d5fbc7e0f444eaaa54725a087476a0964c6bb55f73546000001914c7f6943 create mode 100644 cfstorage/v3/cache/default/blobs/fb9c6030ec8e33eddf76f55554689c77e5e114840ceecbac0b694f727b4fef54000001914c7f30cf create mode 100644 cfstorage/v3/cache/miniflare-CacheObject/9f458c07675338a7426a7b81ac4fb1baf92d034efbcaaf4336379640ed744ded.sqlite create mode 100644 cfstorage/v3/r2/miniflare-R2BucketObject/9cb95b050454e07fe2bdb131e4816ac93bc6052b310dbe98d31b29964876a4e5.sqlite create mode 100644 cfstorage/v3/r2/owid-grapher-configs-staging/blobs/c15fe828b77d5346517b9cefd79fcb5d2c7cbd82f089c2ada26f16edfab7682f000001914c3c5a4e diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 86f6215a9e5..8b7ae33f20f 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -519,9 +519,9 @@ const saveGrapher = async ( config: newConfig, user, }) - chartId = newConfig.id! patchConfig = configs.patchConfig fullConfig = configs.fullConfig + chartId = fullConfig.id! } newConfig = patchConfig diff --git a/adminSiteServer/chartConfigR2Helpers.ts b/adminSiteServer/chartConfigR2Helpers.ts index 11cff24b08f..781fbd233fb 100644 --- a/adminSiteServer/chartConfigR2Helpers.ts +++ b/adminSiteServer/chartConfigR2Helpers.ts @@ -59,6 +59,10 @@ export async function saveGrapherConfigToR2( filename: string, configMd5: Base64String ) { + if (process.env.NODE_ENV === "test") { + console.log("Skipping saving grapher config to R2 in test environment") + return + } if ( GRAPHER_CONFIG_R2_BUCKET === undefined || GRAPHER_CONFIG_R2_BUCKET_PATH === undefined @@ -116,6 +120,10 @@ export async function deleteGrapherConfigFromR2( directory: R2GrapherConfigDirectory, filename: string ) { + if (process.env.NODE_ENV === "test") { + console.log("Skipping saving grapher config to R2 in test environment") + return + } if ( GRAPHER_CONFIG_R2_BUCKET === undefined || GRAPHER_CONFIG_R2_BUCKET_PATH === undefined diff --git a/cfstorage/v3/cache/default/blobs/e43e315d93bd1d24f23d5fbc7e0f444eaaa54725a087476a0964c6bb55f73546000001914c7f6943 b/cfstorage/v3/cache/default/blobs/e43e315d93bd1d24f23d5fbc7e0f444eaaa54725a087476a0964c6bb55f73546000001914c7f6943 new file mode 100644 index 00000000000..8b61c3007a2 --- /dev/null +++ b/cfstorage/v3/cache/default/blobs/e43e315d93bd1d24f23d5fbc7e0f444eaaa54725a087476a0964c6bb55f73546000001914c7f6943 @@ -0,0 +1 @@ +,No table loaded yet.No table loaded yet.Data source:CC BY \ No newline at end of file diff --git a/cfstorage/v3/cache/default/blobs/fb9c6030ec8e33eddf76f55554689c77e5e114840ceecbac0b694f727b4fef54000001914c7f30cf b/cfstorage/v3/cache/default/blobs/fb9c6030ec8e33eddf76f55554689c77e5e114840ceecbac0b694f727b4fef54000001914c7f30cf new file mode 100644 index 00000000000..ec6f9abe6cb --- /dev/null +++ b/cfstorage/v3/cache/default/blobs/fb9c6030ec8e33eddf76f55554689c77e5e114840ceecbac0b694f727b4fef54000001914c7f30cf @@ -0,0 +1 @@ +Not found \ No newline at end of file diff --git a/cfstorage/v3/cache/miniflare-CacheObject/9f458c07675338a7426a7b81ac4fb1baf92d034efbcaaf4336379640ed744ded.sqlite b/cfstorage/v3/cache/miniflare-CacheObject/9f458c07675338a7426a7b81ac4fb1baf92d034efbcaaf4336379640ed744ded.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..2cab1b5d9ff24fd4d0459932407b96f50cb38aa0 GIT binary patch literal 20480 zcmeI&-*4MQ8~||JY3N$1_4Y!wCr&O8tB8pmpY1qk5-H0@v@99HT{lftbUvRG3nvco zWlE`rl<~kL;)Q4a3ityMf6d-`=Il^46ZC;fMPmD&}2(t)|~X?e4wSQ*`+d)a$PI-np3YVEMkXxwkKt%{U1Xwm!#21f{>$@tcR8*7{87 z&Nf0{u~X!?p8DufG9kfebKc-7bK`KWu;>Es<3%ZteU)`M0*ZyhsE zLc&RL|Mr1hdT+12FH40+_H`m2t_Nh!)0NJ8oaS03f2_Wd|M#i9xp(`vy!2MvqS4^t zlR=RyZ*Hb&EB1jzx@<3b-wTq=89MCr4n?#G9iZd3|9S7oNB!P$``&iG>dicZ1Ogxc z0w4eaAOHd&00JNY0w4eaAn>*d?1+siJ0D8BKZy(y2!H?xfB*=900@8p2!H?xyyF6| zQsu@{UFzlU^bzONMzxxxlq91xM|q37&AhMjoq3}jJk_mhZvrNsS^aY9l4f` z9V1VsXJYC`j)yJz$IdUuzny6#MnX2rw8r!28YOhZbef8D_%zWf+H@WyF|D9XACrX$ zztMyAs+>J3qN@W|IrPAZx>yytTfb4`S!W$`ymYZObq zd^MCe&vJ0g zuO;;l_4PGu5T*nH5C8!X009sH0T2KI5C8!X009vAZw0nXWqI|vh_Y2GZ>>IQ_!IU0 Bt}Xxo literal 0 HcmV?d00001 diff --git a/cfstorage/v3/r2/miniflare-R2BucketObject/9cb95b050454e07fe2bdb131e4816ac93bc6052b310dbe98d31b29964876a4e5.sqlite b/cfstorage/v3/r2/miniflare-R2BucketObject/9cb95b050454e07fe2bdb131e4816ac93bc6052b310dbe98d31b29964876a4e5.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..81cf698ad818946ab3465a07f52ca7b82350a935 GIT binary patch literal 32768 zcmeI(-)`Dg90zclCeWdzX)h*}i%CbxO^`~#7-N{GX{7NstAw=FSx2SGI<}7!T7wxj zS+gh?G@Eug?G5%SyX_P7RdzQU8U)HVIsbgl=XbDV>^yza?D~w@?yyIF zB64@RREqnG5RT(g!FD^?rfDhI3AbtDr#o~0rnm>gKUdd&<1*Q|+}iJVK3#jf@@Dl- z_U+2qoK`4;00bZa0SG_<0uX?}dkUOwr87BE7p|6K;UP4yp zVOwcv3eg(dO@$;>AX`}yq|v}}sik)v0Rg&X=ZhFytXKvFK5oI6x{y^^qF|nvmli1CX=et2r=2Y=~8eFDnYO52h zlaRTw-_%HHwpy}zbvd2;L4G&JDaR$qI6M8imdfNdH~C-W8D$*$5HQ}ob%iZ1h_PlE zSD4PP=}72^4ZW`0PqO)dRUS=)S=W3yre6Xt>2UuoR@uCCE1gp}=S5*EIkAsFgqPKm zOKq3W%X_=gAe-ApijgKmY;|fB*y_009U<00I#BUkR)Rqvh}X%>^$c2tWV=5P$## zAOHafKmY;|fWXHm@Op_~8S~@9ZfbY=-jjP4JMxMa?RS||JQ&i!AsZHr=~l2J3HTE&p5C|2tw zgE6}DnQTTQa^aoq8N^R zhyQvJ-v7t*|BD)2j0*w~fB*y_009U<00Izz00ba#GXnVk|C@0{FA#tL1Rwwb2tWV= z5P$##AOL|y7Qp@gMUD=}1_1~_00Izz00bZa0SG_<0uZkY* literal 0 HcmV?d00001 diff --git a/cfstorage/v3/r2/owid-grapher-configs-staging/blobs/c15fe828b77d5346517b9cefd79fcb5d2c7cbd82f089c2ada26f16edfab7682f000001914c3c5a4e b/cfstorage/v3/r2/owid-grapher-configs-staging/blobs/c15fe828b77d5346517b9cefd79fcb5d2c7cbd82f089c2ada26f16edfab7682f000001914c3c5a4e new file mode 100644 index 00000000000..1d2983252b3 --- /dev/null +++ b/cfstorage/v3/r2/owid-grapher-configs-staging/blobs/c15fe828b77d5346517b9cefd79fcb5d2c7cbd82f089c2ada26f16edfab7682f000001914c3c5a4e @@ -0,0 +1 @@ +"{\"id\": 126, \"map\": {\"time\": 2024, \"colorScale\": {\"equalSizeBins\": true, \"baseColorScheme\": \"YlGnBu\", \"binningStrategy\": \"manual\", \"colorSchemeInvert\": false, \"legendDescription\": \"Logo Life expectancy at age 10\", \"customNumericColors\": [\"#ecf8e6\", null, \"#e0f3db\", \"#d6efd0\", \"#ccebc5\", \"#a8ddb5\", \"#7bccc4\", \"#4eb3d3\", \"#2b8cbe\", \"#0868ac\"], \"customNumericValues\": [55, 60, 65, 70, 75, 80, 85, 90, 95, -1], \"customCategoryColors\": {\"NA\": \"#818282\", \"No data\": \"#8b8b8b\"}, \"customNumericMinValue\": 54, \"customHiddenCategories\": {\"NA\": true, \"No data\": false}, \"binningStrategyBinCount\": 5, \"customNumericColorsActive\": false}, \"columnSlug\": \"539027\", \"projection\": \"World\", \"hideTimeline\": false, \"timeTolerance\": 10, \"toleranceStrategy\": \"closest\", \"tooltipUseCustomLabels\": false}, \"tab\": \"map\", \"logo\": \"owid\", \"slug\": \"life-expectancy-at-age-15\", \"type\": \"LineChart\", \"title\": \"Life expectancy at age 15\", \"xAxis\": {\"scaleType\": \"linear\", \"facetDomain\": \"shared\", \"canChangeScaleType\": false, \"removePointsOutsideDomain\": false}, \"yAxis\": {\"scaleType\": \"linear\", \"facetDomain\": \"shared\", \"canChangeScaleType\": false, \"removePointsOutsideDomain\": false}, \"sortBy\": \"total\", \"$schema\": \"https://files.ourworldindata.org/schemas/grapher-schema.004.json\", \"maxTime\": \"latest\", \"minTime\": \"earliest\", \"version\": 18, \"hideLogo\": false, \"subtitle\": \"The [period life expectancy](#dod:period-life-expectancy) for people who have reached the age of 15, in a given year. From 2022 onwards, the UN WPP's mid-variant projections are shown.\", \"hasMapTab\": true, \"originUrl\": \"https://ourworldindata.org/life-expectancy/\", \"sortOrder\": \"desc\", \"stackMode\": \"absolute\", \"colorScale\": {\"equalSizeBins\": true, \"binningStrategy\": \"ckmeans\", \"colorSchemeInvert\": false, \"binningStrategyBinCount\": 5, \"customNumericColorsActive\": false}, \"dimensions\": [{\"display\": {\"name\": \"e10\", \"unit\": \"years\", \"tolerance\": 5, \"includeInTable\": true}, \"property\": \"y\", \"variableId\": 815408}], \"entityType\": \"country or region\", \"hideLegend\": false, \"hasChartTab\": true, \"isPublished\": true, \"hideTimeline\": false, \"addCountryMode\": \"add-country\", \"showNoDataArea\": true, \"showYearLabels\": false, \"zoomToSelection\": false, \"entityTypePlural\": \"countries\", \"hideFacetControl\": true, \"hideScatterLabels\": false, \"invertColorScheme\": false, \"hideRelativeToggle\": true, \"hideTotalValueLabel\": false, \"missingDataStrategy\": \"auto\", \"selectedEntityNames\": [\"China\", \"Brazil\", \"Sweden\", \"Italy\", \"United Kingdom\"], \"compareEndPointsOnly\": false, \"matchingEntitiesOnly\": false, \"selectedFacetStrategy\": \"none\", \"hideConnectedScatterLines\": false, \"hideLinesOutsideTolerance\": false, \"scatterPointLabelStrategy\": \"year\", \"facettingLabelByYVariables\": \"metric\", \"hideAnnotationFieldsInTitle\": {\"time\": false, \"entity\": false, \"changeInPrefix\": false}}" \ No newline at end of file From 811d4101db0916e0ac9b0ced1b8a5c1ff7ac3e8f Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Mon, 5 Aug 2024 19:19:23 +0200 Subject: [PATCH 17/26] =?UTF-8?q?=F0=9F=94=A8=20configure=20r2=20bindings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wrangler.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/wrangler.toml b/wrangler.toml index 4d88b657784..fc2d7e14219 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -17,7 +17,16 @@ ENV = "preview" [env.production] compatibility_date = "2024-04-29" +[[env.production.r2_buckets]] +binding = "r2ChartConfigs" +bucket_name = "owid-grapher-configs" + [env.production.vars] ENV = "production" MAILGUN_DOMAIN = "mg.ourworldindata.org" SLACK_ERROR_CHANNEL_ID = "C5JJW19PS" + + +[[env.development.r2_buckets]] +binding = "r2ChartConfigs" +bucket_name = "owid-grapher-configs-staging" From 01900cef4b3dfa3082b8ac42494fb891cd813a0c Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 6 Aug 2024 12:35:17 +0200 Subject: [PATCH 18/26] =?UTF-8?q?=F0=9F=9A=A7=20start=20work=20on=20fetchi?= =?UTF-8?q?ng=20grapher=20configs=20for=20pages=20functions=20from=20R2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- adminSiteServer/chartConfigR2Helpers.ts | 5 +--- functions/_common/grapherRenderer.ts | 27 ++++++++++++++----- functions/grapher/thumbnail/[slug].ts | 12 +++++++++ .../types/src/domainTypes/Various.ts | 5 ++++ packages/@ourworldindata/types/src/index.ts | 1 + wrangler.toml | 9 ++++++- 6 files changed, 47 insertions(+), 12 deletions(-) diff --git a/adminSiteServer/chartConfigR2Helpers.ts b/adminSiteServer/chartConfigR2Helpers.ts index 781fbd233fb..eb670bcb46e 100644 --- a/adminSiteServer/chartConfigR2Helpers.ts +++ b/adminSiteServer/chartConfigR2Helpers.ts @@ -14,6 +14,7 @@ import { S3Client, } from "@aws-sdk/client-s3" import { Base64String, JsonError } from "@ourworldindata/utils" +import { R2GrapherConfigDirectory } from "@ourworldindata/types" import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js" import { createHash } from "crypto" @@ -25,10 +26,6 @@ export function getMd5HashBase64(data: string): Base64String { .update(data, "utf-8") .digest("base64") as Base64String } -export enum R2GrapherConfigDirectory { - byUUID = "config/by-uuid", - publishedGrapherBySlug = "grapher/by-slug", -} let s3Client: S3Client | undefined = undefined diff --git a/functions/_common/grapherRenderer.ts b/functions/_common/grapherRenderer.ts index 249488b75bf..7fdf9ed6065 100644 --- a/functions/_common/grapherRenderer.ts +++ b/functions/_common/grapherRenderer.ts @@ -1,5 +1,9 @@ import { Grapher, GrapherInterface } from "@ourworldindata/grapher" -import { Bounds, deserializeJSONFromHTML } from "@ourworldindata/utils" +import { + Bounds, + deserializeJSONFromHTML, + excludeUndefined, +} from "@ourworldindata/utils" import { svg2png, initialize as initializeSvg2Png } from "svg2png-wasm" import { TimeLogger } from "./timeLogger" import { png } from "itty-router" @@ -12,6 +16,7 @@ import LatoMedium from "../_common/fonts/LatoLatin-Medium.ttf.bin" import LatoBold from "../_common/fonts/LatoLatin-Bold.ttf.bin" import PlayfairSemiBold from "../_common/fonts/PlayfairDisplayLatin-SemiBold.ttf.bin" import { Env } from "../grapher/thumbnail/[slug].js" +import { R2GrapherConfigDirectory } from "@ourworldindata/types" declare global { // eslint-disable-next-line no-var @@ -143,13 +148,21 @@ async function fetchAndRenderGrapherToSvg({ }) { const grapherLogger = new TimeLogger("grapher") + const url = new URL(`/grapher/${slug}`, env.url) + const slugOnly = url.pathname.split("/").pop() + console.log("Fetching", url.href) + const key = excludeUndefined([ + env.GRAPHER_CONFIG_R2_BUCKET_PATH, + R2GrapherConfigDirectory.bySlug, + `${slugOnly}.json`, + ]).join("/") + + console.log("Fetching", key) + + console.log("r2", env.r2ChartConfigs) + // Fetch grapher config and extract it from the HTML - const grapherConfig: GrapherInterface = await env.ASSETS.fetch( - new URL(`/grapher/${slug}`, env.url) - ) - .then((r) => (r.ok ? r : Promise.reject("Failed to load grapher page"))) - .then((r) => r.text()) - .then((html) => deserializeJSONFromHTML(html)) + const grapherConfig: GrapherInterface = await env.r2ChartConfigs.get(key) if (!grapherConfig) { throw new Error("Could not find grapher config") diff --git a/functions/grapher/thumbnail/[slug].ts b/functions/grapher/thumbnail/[slug].ts index b5efae2ac13..6add785b1b2 100644 --- a/functions/grapher/thumbnail/[slug].ts +++ b/functions/grapher/thumbnail/[slug].ts @@ -5,7 +5,11 @@ export interface Env { ASSETS: { fetch: typeof fetch } + r2ChartConfigs: { + get: (url: string) => Promise + } url: URL + GRAPHER_CONFIG_R2_BUCKET_PATH: string ENV: string } @@ -30,6 +34,14 @@ router export const onRequestGet: PagesFunction = async (ctx) => { const { request, env } = ctx + const test = await (ctx.env as any).r2ChartConfigs.get( + "devs/daniel/grapher/by-slug/life-expectancy.json" + ) + const listed = await (ctx.env as any).r2ChartConfigs.list({ limit: 10 }) + console.log("listed", listed) + console.log("bucket is null", (ctx.env as any).r2ChartConfigs === null) + console.log("has get get", "get" in (ctx.env as any).r2ChartConfigs) + console.log("r2", test) const url = new URL(request.url) const shouldCache = !url.searchParams.has("nocache") diff --git a/packages/@ourworldindata/types/src/domainTypes/Various.ts b/packages/@ourworldindata/types/src/domainTypes/Various.ts index 946339baa14..bc23e990f9d 100644 --- a/packages/@ourworldindata/types/src/domainTypes/Various.ts +++ b/packages/@ourworldindata/types/src/domainTypes/Various.ts @@ -65,3 +65,8 @@ export class JsonError extends Error { export interface QueryParams { [key: string]: string | undefined } + +export enum R2GrapherConfigDirectory { + byUUID = "config/by-uuid", + publishedGrapherBySlug = "grapher/by-slug", +} diff --git a/packages/@ourworldindata/types/src/index.ts b/packages/@ourworldindata/types/src/index.ts index 3c8e8af77d0..42cc9443868 100644 --- a/packages/@ourworldindata/types/src/index.ts +++ b/packages/@ourworldindata/types/src/index.ts @@ -18,6 +18,7 @@ export { type RawPageview, type UserCountryInformation, type QueryParams, + type R2GrapherConfigDirectory, } from "./domainTypes/Various.js" export { type BreadcrumbItem, type KeyValueProps } from "./domainTypes/Site.js" export { diff --git a/wrangler.toml b/wrangler.toml index fc2d7e14219..49263f75f47 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -13,6 +13,10 @@ MAILGUN_DOMAIN = "mg.ourworldindata.org" SLACK_ERROR_CHANNEL_ID = "C016H0BNNB1" ENV = "preview" +[[r2_buckets]] +binding = "r2ChartConfigs" +bucket_name = "owid-grapher-configs-staging" + # Overrides for CF production deployment [env.production] compatibility_date = "2024-04-29" @@ -25,8 +29,11 @@ bucket_name = "owid-grapher-configs" ENV = "production" MAILGUN_DOMAIN = "mg.ourworldindata.org" SLACK_ERROR_CHANNEL_ID = "C5JJW19PS" +GRAPHER_CONFIG_R2_BUCKET_PATH = "v1" -[[env.development.r2_buckets]] +[[env.preview.r2_buckets]] binding = "r2ChartConfigs" bucket_name = "owid-grapher-configs-staging" + +[env.preview.vars] From 577059074b2e658317c17f55ef35f663026c6b79 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 6 Aug 2024 20:26:56 +0200 Subject: [PATCH 19/26] =?UTF-8?q?=E2=9C=A8=20finish=20cf=20functions=20fet?= =?UTF-8?q?ching=20from=20R2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + adminSiteServer/apiRouter.ts | 2 +- devTools/syncGraphersToR2/syncGraphersToR2.ts | 2 +- functions/_common/grapherRenderer.ts | 28 +++++++++---------- functions/grapher/thumbnail/[slug].ts | 8 ------ package.json | 2 +- packages/@ourworldindata/types/src/index.ts | 2 +- wrangler.toml | 1 - 8 files changed, 19 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 388475e6261..526e12ee4b2 100755 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,4 @@ dist/ .nx/workspace-data .dev.vars **/tsup.config.bundled*.mjs +cfstorage diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index 8b7ae33f20f..a60f472803d 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -85,6 +85,7 @@ import { DbInsertUser, FlatTagGraph, DbRawChartConfig, + R2GrapherConfigDirectory, } from "@ourworldindata/types" import { uuidv7 } from "uuidv7" import { @@ -158,7 +159,6 @@ import path from "path" import { deleteGrapherConfigFromR2, deleteGrapherConfigFromR2ByUUID, - R2GrapherConfigDirectory, saveGrapherConfigToR2, saveGrapherConfigToR2ByUUID, getMd5HashBase64, diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index b7a546b5f39..fbd948329fa 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -23,7 +23,6 @@ import { KnexReadonlyTransaction, knexReadonlyTransaction, } from "../../db/db.js" -import { R2GrapherConfigDirectory } from "../../adminSiteServer/chartConfigR2Helpers.js" import { base64ToBytes, bytesToBase64, @@ -32,6 +31,7 @@ import { excludeUndefined, HexString, hexToBytes, + R2GrapherConfigDirectory, } from "@ourworldindata/utils" import { string } from "ts-pattern/dist/patterns.js" import { chunk, take } from "lodash" diff --git a/functions/_common/grapherRenderer.ts b/functions/_common/grapherRenderer.ts index 7fdf9ed6065..06fa9e853ed 100644 --- a/functions/_common/grapherRenderer.ts +++ b/functions/_common/grapherRenderer.ts @@ -1,8 +1,9 @@ -import { Grapher, GrapherInterface } from "@ourworldindata/grapher" +import { Grapher } from "@ourworldindata/grapher" import { Bounds, - deserializeJSONFromHTML, excludeUndefined, + GrapherInterface, + R2GrapherConfigDirectory, } from "@ourworldindata/utils" import { svg2png, initialize as initializeSvg2Png } from "svg2png-wasm" import { TimeLogger } from "./timeLogger" @@ -16,7 +17,6 @@ import LatoMedium from "../_common/fonts/LatoLatin-Medium.ttf.bin" import LatoBold from "../_common/fonts/LatoLatin-Bold.ttf.bin" import PlayfairSemiBold from "../_common/fonts/PlayfairDisplayLatin-SemiBold.ttf.bin" import { Env } from "../grapher/thumbnail/[slug].js" -import { R2GrapherConfigDirectory } from "@ourworldindata/types" declare global { // eslint-disable-next-line no-var @@ -150,25 +150,21 @@ async function fetchAndRenderGrapherToSvg({ const url = new URL(`/grapher/${slug}`, env.url) const slugOnly = url.pathname.split("/").pop() - console.log("Fetching", url.href) const key = excludeUndefined([ env.GRAPHER_CONFIG_R2_BUCKET_PATH, - R2GrapherConfigDirectory.bySlug, + R2GrapherConfigDirectory.publishedGrapherBySlug, `${slugOnly}.json`, ]).join("/") - console.log("Fetching", key) + // Fetch grapher config + const fetchResponse = await env.r2ChartConfigs.get(key) - console.log("r2", env.r2ChartConfigs) - - // Fetch grapher config and extract it from the HTML - const grapherConfig: GrapherInterface = await env.r2ChartConfigs.get(key) - - if (!grapherConfig) { - throw new Error("Could not find grapher config") + if (!fetchResponse) { + return null } - grapherLogger.log("fetchGrapherConfig") + const grapherConfig: GrapherInterface = await fetchResponse.json() + console.log("grapher interface", grapherConfig) const bounds = new Bounds(0, 0, options.svgWidth, options.svgHeight) const grapher = new Grapher({ @@ -219,6 +215,10 @@ export const fetchAndRenderGrapher = async ( env, }) + if (!svg) { + return new Response("Not found", { status: 404 }) + } + switch (outType) { case "png": return png(await renderSvgToPng(svg, options)) diff --git a/functions/grapher/thumbnail/[slug].ts b/functions/grapher/thumbnail/[slug].ts index 6add785b1b2..5d003bcd876 100644 --- a/functions/grapher/thumbnail/[slug].ts +++ b/functions/grapher/thumbnail/[slug].ts @@ -34,14 +34,6 @@ router export const onRequestGet: PagesFunction = async (ctx) => { const { request, env } = ctx - const test = await (ctx.env as any).r2ChartConfigs.get( - "devs/daniel/grapher/by-slug/life-expectancy.json" - ) - const listed = await (ctx.env as any).r2ChartConfigs.list({ limit: 10 }) - console.log("listed", listed) - console.log("bucket is null", (ctx.env as any).r2ChartConfigs === null) - console.log("has get get", "get" in (ctx.env as any).r2ChartConfigs) - console.log("r2", test) const url = new URL(request.url) const shouldCache = !url.searchParams.has("nocache") diff --git a/package.json b/package.json index 20f8e503d94..7a5c7142f9b 100644 --- a/package.json +++ b/package.json @@ -26,7 +26,7 @@ "revertLastDbMigration": "tsx --tsconfig tsconfig.tsx.json node_modules/typeorm/cli.js migration:revert -d db/dataSource.ts", "startAdminServer": "node --enable-source-maps ./itsJustJavascript/adminSiteServer/app.js", "startAdminDevServer": "tsx watch --ignore '**.mjs' --tsconfig tsconfig.tsx.json adminSiteServer/app.tsx", - "startLocalCloudflareFunctions": "wrangler pages dev", + "startLocalCloudflareFunctions": "wrangler pages dev --local --persist-to ./cfstorage", "startDeployQueueServer": "node --enable-source-maps ./itsJustJavascript/baker/startDeployQueueServer.js", "startLernaWatcher": "lerna watch --scope '@ourworldindata/*' -- lerna run build --scope=\\$LERNA_PACKAGE_NAME --include-dependents", "startTmuxServer": "node_modules/tmex/tmex dev \"yarn startLernaWatcher\" \"yarn startAdminDevServer\" \"yarn startViteServer\"", diff --git a/packages/@ourworldindata/types/src/index.ts b/packages/@ourworldindata/types/src/index.ts index 42cc9443868..019d446f9ec 100644 --- a/packages/@ourworldindata/types/src/index.ts +++ b/packages/@ourworldindata/types/src/index.ts @@ -18,7 +18,7 @@ export { type RawPageview, type UserCountryInformation, type QueryParams, - type R2GrapherConfigDirectory, + R2GrapherConfigDirectory, } from "./domainTypes/Various.js" export { type BreadcrumbItem, type KeyValueProps } from "./domainTypes/Site.js" export { diff --git a/wrangler.toml b/wrangler.toml index 49263f75f47..ab8b57941aa 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -36,4 +36,3 @@ GRAPHER_CONFIG_R2_BUCKET_PATH = "v1" binding = "r2ChartConfigs" bucket_name = "owid-grapher-configs-staging" -[env.preview.vars] From 0042a26904b0b21f94016afe3aeeefc9fbb46118 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Wed, 7 Aug 2024 09:31:51 +0200 Subject: [PATCH 20/26] =?UTF-8?q?=F0=9F=94=A8=20add=20fallback=20to=20bran?= =?UTF-8?q?ch=20name=20as=20directory=20in=20R2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- functions/_common/grapherRenderer.ts | 14 ++++++++++++-- functions/grapher/thumbnail/[slug].ts | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/functions/_common/grapherRenderer.ts b/functions/_common/grapherRenderer.ts index 06fa9e853ed..ffe054581dc 100644 --- a/functions/_common/grapherRenderer.ts +++ b/functions/_common/grapherRenderer.ts @@ -150,12 +150,22 @@ async function fetchAndRenderGrapherToSvg({ const url = new URL(`/grapher/${slug}`, env.url) const slugOnly = url.pathname.split("/").pop() + + // The top level directory is either the bucket path (should be set in dev environments and production) + // or the branch name on preview staging environments + console.log("branch", env.CF_PAGES_BRANCH) + const topLevelDirectory = env.GRAPHER_CONFIG_R2_BUCKET_PATH + ? [env.GRAPHER_CONFIG_R2_BUCKET_PATH] + : ["by-branch", env.CF_PAGES_BRANCH] + const key = excludeUndefined([ - env.GRAPHER_CONFIG_R2_BUCKET_PATH, + ...topLevelDirectory, R2GrapherConfigDirectory.publishedGrapherBySlug, `${slugOnly}.json`, ]).join("/") + console.log("fetching grapher config from this key", key) + // Fetch grapher config const fetchResponse = await env.r2ChartConfigs.get(key) @@ -164,7 +174,7 @@ async function fetchAndRenderGrapherToSvg({ } const grapherConfig: GrapherInterface = await fetchResponse.json() - console.log("grapher interface", grapherConfig) + console.log("grapher title", grapherConfig.title) const bounds = new Bounds(0, 0, options.svgWidth, options.svgHeight) const grapher = new Grapher({ diff --git a/functions/grapher/thumbnail/[slug].ts b/functions/grapher/thumbnail/[slug].ts index 5d003bcd876..a62cb8c8d17 100644 --- a/functions/grapher/thumbnail/[slug].ts +++ b/functions/grapher/thumbnail/[slug].ts @@ -10,6 +10,7 @@ export interface Env { } url: URL GRAPHER_CONFIG_R2_BUCKET_PATH: string + CF_PAGES_BRANCH: string ENV: string } From 72aa0f218c74824421d7382523a4b0cff0dc56f2 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 13 Aug 2024 18:12:03 +0200 Subject: [PATCH 21/26] =?UTF-8?q?=F0=9F=93=9C=20update=20readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- functions/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/functions/README.md b/functions/README.md index caaa61334a0..6fdf5ca8bf4 100644 --- a/functions/README.md +++ b/functions/README.md @@ -10,6 +10,8 @@ Pages Functions are very similar to Cloudflare Workers; however they will always Pages Functions use file-based routing, which means that the file `grapher/[slug].ts` will serve routes like `/grapher/child-mortality`. In addition, there's a [`_routes.json`](../_routes.json) file that specifies which routes are to be served dynamically. +Inside a file-based route we sometimes use an instance of itty-router to decide on the exact functionality to provide (e.g. png vs svg generation) + ## Development 1. Copy `.dev.vars.example` to `.dev.vars` and fill in the required variables. @@ -28,7 +30,9 @@ Note: compatibility dates between local development, production and preview envi ## Testing on Fondation staging sites vs Cloudfare previews -`yarn deployContentPreview` deploys the staging `bakedSite` to a Cloudflare preview at https://[PREVIEW_BRANCH].owid-staging.pages.dev. This is the recommended way to test functions in a production-like environment. See [../ops/buildkite/deploy-content-preview](../ops/buildkite/deploy-content-preview) for more details. +We have two cloudflare projects set up that you can deploy previews to. `owid` which is also where our production deployment runs, and `owid-staging`. Currently, `owid` is configured to require authentication while `owid-staging` is accessible from the internet without any kind of auth. + +`yarn deployContentPreview` deploys the staging `bakedSite` to a Cloudflare preview at https://[PREVIEW_BRANCH].[PROJECT].pages.dev. This is the recommended way to test functions in a production-like environment. See [../ops/buildkite/deploy-content-preview](../ops/buildkite/deploy-content-preview) for more details. ### Rationale @@ -36,7 +40,7 @@ A custom staging site is available at http://staging-site-[BRANCH] upon pushing When it comes to testing functions in a production-like environment, Cloudflare previews are recommended. -Cloudflare previews are served by Cloudflare (as opposed to `wrangler` on staging sites) and are available at https://[RANDOM_ID].owid-staging.pages.dev. Cloudflare previews do not rely on the `wrangler` CLI and its `.dev.vars` file. Instead, they use the [Cloudflare dashboard to configure environment variables](https://dash.cloudflare.com/078fcdfed9955087315dd86792e71a7e/pages/view/owid/settings/environment-variables), in the same way and place as the production site. +Cloudflare previews are served by Cloudflare (as opposed to `wrangler` on staging sites) and are available at https://[RANDOM_ID].[PROJECT].pages.dev. Cloudflare previews do not rely on the `wrangler` CLI and its `.dev.vars` file, but they do take the `wrangler.toml` file into account for environment variables. For secrets, they use the [values set via the Cloudflare dashboard](https://dash.cloudflare.com/078fcdfed9955087315dd86792e71a7e/pages/view/owid/settings/environment-variables), in the same way and place as the production site. This proximity of configurations in the Cloudflare dashboard makes spotting differences between production and preview environments easier - and is one of the reason of using Cloudflare previews in the same project (owid) over using a new project specific to staging. From 3177d4a74659bc2a72a527b28c8a3d883979e389 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Mon, 12 Aug 2024 22:35:09 +0200 Subject: [PATCH 22/26] Create a PR that changes the script in ./devTools/syncGraphersToR2 so that it supports multiple commands Add support for multiple commands in `syncGraphersToR2.ts` script. * **Refactor existing implementation:** - Move the existing implementation to the "sync" subcommand. - Update the `main` function to handle the new subcommand structure. - Parse the command-line arguments to determine which subcommand to execute. * **Add new subcommand:** - Add a new "store-dev-by-slug" subcommand with a dummy implementation. - Add a help message for the new subcommand. --- For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/owid/owid-grapher?shareId=XXXX-XXXX-XXXX-XXXX). --- devTools/syncGraphersToR2/syncGraphersToR2.ts | 34 +++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index fbd948329fa..e57df4324d1 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -193,7 +193,7 @@ async function syncWithR2( } } -async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { +async function sync(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { if ( GRAPHER_CONFIG_R2_BUCKET === undefined || GRAPHER_CONFIG_R2_BUCKET_PATH === undefined @@ -279,13 +279,41 @@ async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { }) } +async function storeDevBySlug(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { + console.log("Dummy implementation for store-dev-by-slug") +} + +async function main(parsedArgs: parseArgs.ParsedArgs) { + const dryRun = parsedArgs["dry-run"] + + const command = parsedArgs._[0] + + switch (command) { + case "sync": + await sync(parsedArgs, dryRun) + break + case "store-dev-by-slug": + await storeDevBySlug(parsedArgs, dryRun) + break + default: + console.log( + `Unknown command: ${command}\n\nAvailable commands:\n sync\n store-dev-by-slug` + ) + break + } +} + const parsedArgs = parseArgs(process.argv.slice(2)) if (parsedArgs["h"]) { console.log( `syncGraphersToR2.js - sync grapher configs from the chart_configs table to R2 ---dry-run: Don't make any actual changes to R2` +--dry-run: Don't make any actual changes to R2 + +Commands: + sync: Sync grapher configs to R2 + store-dev-by-slug: Dummy implementation for store-dev-by-slug` ) } else { - main(parsedArgs, parsedArgs["dry-run"]) + main(parsedArgs) } From cc543172e775b06c5c19a95bd682bf648a04b880 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 13 Aug 2024 18:18:51 +0200 Subject: [PATCH 23/26] =?UTF-8?q?=F0=9F=90=9D=20reformat,=20make=20sure=20?= =?UTF-8?q?process=20exits?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- devTools/syncGraphersToR2/syncGraphersToR2.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index e57df4324d1..52df77a92f6 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -279,7 +279,10 @@ async function sync(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { }) } -async function storeDevBySlug(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { +async function storeDevBySlug( + parsedArgs: parseArgs.ParsedArgs, + dryRun: boolean +) { console.log("Dummy implementation for store-dev-by-slug") } @@ -301,6 +304,7 @@ async function main(parsedArgs: parseArgs.ParsedArgs) { ) break } + process.exit(0) } const parsedArgs = parseArgs(process.argv.slice(2)) From bbf9268f7635aa194b94fa460f06a038aad3bc08 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 13 Aug 2024 16:45:54 +0200 Subject: [PATCH 24/26] Add storing single charts to local R2 bucket to syncGraphersToR2 tool The syncGraphersToR2 tool let's you sync all needed entries from the chart_configs table to an actual R2 bucket. This is useful for production and staging servers but in local dev the R2 bindings currently can't connect to an actual R2 bucket and instead simulate one using a local folder. This PR adds a tool to store single chart configs by slug from the database into the local R2 bucket by invoking the `wrangler` cli tool for storing R2 objects. --- For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/owid/owid-grapher?shareId=XXXX-XXXX-XXXX-XXXX). --- devTools/syncGraphersToR2/syncGraphersToR2.ts | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index 52df77a92f6..7a084d94b36 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -36,6 +36,8 @@ import { import { string } from "ts-pattern/dist/patterns.js" import { chunk, take } from "lodash" import ProgressBar from "progress" +import { getEnrichedChartBySlug } from "../../db/model/Chart.js" +import { exec } from "child_process" type HashAndId = Pick @@ -283,7 +285,43 @@ async function storeDevBySlug( parsedArgs: parseArgs.ParsedArgs, dryRun: boolean ) { - console.log("Dummy implementation for store-dev-by-slug") + const slug = parsedArgs._[1] + if (!slug) { + console.error("No slug provided") + return + } + + await knexReadonlyTransaction(async (trx) => { + const chart = await knexRawFirst( + trx, + `SELECT full FROM chart_configs WHERE slug = ?`, + [slug] + ) + if (!chart) { + console.error(`No chart found for slug ${slug}`) + return + } + + const fullConfig = JSON.stringify(chart.full) + const command = `npx wrangler r2 object put --local $GRAPHER_CONFIG_R2_BUCKET/$GRAPHER_CONFIG_R2_BUCKET_PATH/grapher/by-slug/${slug}.json --pipe --content-type application/json --persist-to ./cfstorage` + + const process = exec(command, (error, stdout, stderr) => { + if (error) { + console.error( + `Error executing wrangler command: ${error.message}` + ) + return + } + if (stderr) { + console.error(`Wrangler stderr: ${stderr}`) + return + } + console.log(`Wrangler stdout: ${stdout}`) + }) + + process.stdin.write(fullConfig) + process.stdin.end() + }) } async function main(parsedArgs: parseArgs.ParsedArgs) { From 424faeabffc41f04ad78fed1511d7c8b822967c9 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 13 Aug 2024 18:09:54 +0200 Subject: [PATCH 25/26] =?UTF-8?q?=F0=9F=94=A8=20make=20implementation=20wo?= =?UTF-8?q?rk?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- devTools/syncGraphersToR2/syncGraphersToR2.ts | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index 7a084d94b36..a34f011e27c 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -3,7 +3,6 @@ import parseArgs from "minimist" import { DeleteObjectCommand, DeleteObjectCommandInput, - ListObjectsCommand, ListObjectsV2Command, ListObjectsV2CommandOutput, PutObjectCommand, @@ -20,23 +19,20 @@ import { } from "../../settings/serverSettings.js" import { knexRaw, + knexRawFirst, KnexReadonlyTransaction, knexReadonlyTransaction, } from "../../db/db.js" import { - base64ToBytes, bytesToBase64, DbRawChartConfig, - differenceOfSets, excludeUndefined, HexString, hexToBytes, R2GrapherConfigDirectory, } from "@ourworldindata/utils" -import { string } from "ts-pattern/dist/patterns.js" -import { chunk, take } from "lodash" +import { chunk } from "lodash" import ProgressBar from "progress" -import { getEnrichedChartBySlug } from "../../db/model/Chart.js" import { exec } from "child_process" type HashAndId = Pick @@ -303,7 +299,7 @@ async function storeDevBySlug( } const fullConfig = JSON.stringify(chart.full) - const command = `npx wrangler r2 object put --local $GRAPHER_CONFIG_R2_BUCKET/$GRAPHER_CONFIG_R2_BUCKET_PATH/grapher/by-slug/${slug}.json --pipe --content-type application/json --persist-to ./cfstorage` + const command = `npx wrangler r2 object put --local ${GRAPHER_CONFIG_R2_BUCKET}/${GRAPHER_CONFIG_R2_BUCKET_PATH}/${R2GrapherConfigDirectory.publishedGrapherBySlug}/${slug}.json --pipe --content-type application/json --persist-to ./cfstorage` const process = exec(command, (error, stdout, stderr) => { if (error) { @@ -319,8 +315,10 @@ async function storeDevBySlug( console.log(`Wrangler stdout: ${stdout}`) }) - process.stdin.write(fullConfig) - process.stdin.end() + if (process.stdin) { + process.stdin.write(fullConfig) + process.stdin.end() + } }) } From 13bceeb29eb15eabb983b488d0f93c8f279db3e7 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 13 Aug 2024 20:29:09 +0200 Subject: [PATCH 26/26] =?UTF-8?q?=F0=9F=94=A8=20add=20readme,=20tweak=20sc?= =?UTF-8?q?ript?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- devTools/syncGraphersToR2/README.md | 19 +++++++++++++++++++ devTools/syncGraphersToR2/syncGraphersToR2.ts | 5 +++-- 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 devTools/syncGraphersToR2/README.md diff --git a/devTools/syncGraphersToR2/README.md b/devTools/syncGraphersToR2/README.md new file mode 100644 index 00000000000..649235d35be --- /dev/null +++ b/devTools/syncGraphersToR2/README.md @@ -0,0 +1,19 @@ +# syncGraphersToR2 + +This script, `syncGraphersToR2.ts`, is used to sync grapher configurations from the `chart_configs` table to R2 storage. It supports different commands to perform specific tasks. + +## Available Commands + +- `sync`: Sync all grapher configs from the DB to R2 buckets, both upserting into R2 and deleting obsolete ones from R2. This command is useful for production if the R2 storage should get out of sync with the database and/or to initially fill R2. It can't be used to fill local development R2 buckets. +- `store-dev-by-slug`: Fetch a grapher config by slug from the `chart_configs` table and store it in the local dev R2 storage. This is useful for your local dev environment when you want to test the CF Pages Functions that need R2 files to exist. CF Pages Functions using R2 bindings can (as of 2024-08-13) not access real remote R2 buckets. + +## Usage + +To run the script, use the following command: + +```sh +yarn syncGraphersToR2 [command] [options] +``` + +Options +--dry-run: Don't make any actual changes to R2. diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts index a34f011e27c..6157b5bb588 100644 --- a/devTools/syncGraphersToR2/syncGraphersToR2.ts +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -288,9 +288,10 @@ async function storeDevBySlug( } await knexReadonlyTransaction(async (trx) => { + // Fetch the chart config from the DB by slug const chart = await knexRawFirst( trx, - `SELECT full FROM chart_configs WHERE slug = ?`, + `SELECT full FROM chart_configs WHERE slug = ? and full ->> '$.isPublished' = "true"`, [slug] ) if (!chart) { @@ -352,7 +353,7 @@ if (parsedArgs["h"]) { Commands: sync: Sync grapher configs to R2 - store-dev-by-slug: Dummy implementation for store-dev-by-slug` + store-dev-by-slug: Fetch a grapher config by slug from the chart_configs table and store it the local dev R2 storage` ) } else { main(parsedArgs)