diff --git a/.env.devcontainer b/.env.devcontainer index 7bc29cc6500..dc8a8bd8d54 100644 --- a/.env.devcontainer +++ b/.env.devcontainer @@ -16,8 +16,8 @@ GDOCS_CLIENT_ID='' GDOCS_BASIC_ARTICLE_TEMPLATE_URL='' GDOCS_SHARED_DRIVE_ID='' -IMAGE_HOSTING_R2_ENDPOINT='' +R2_ENDPOINT='' IMAGE_HOSTING_R2_CDN_URL='' IMAGE_HOSTING_R2_BUCKET_PATH='' -IMAGE_HOSTING_R2_ACCESS_KEY_ID='' -IMAGE_HOSTING_R2_SECRET_ACCESS_KEY='' +R2_ACCESS_KEY_ID='' +R2_SECRET_ACCESS_KEY='' diff --git a/.env.example-full b/.env.example-full index 8407d05b69b..d0cb5063adf 100644 --- a/.env.example-full +++ b/.env.example-full @@ -22,11 +22,17 @@ GDOCS_BASIC_ARTICLE_TEMPLATE_URL= GDOCS_SHARED_DRIVE_ID= GDOCS_DONATE_FAQS_DOCUMENT_ID= # optional -IMAGE_HOSTING_R2_ENDPOINT= # optional +R2_ENDPOINT= # optional IMAGE_HOSTING_R2_CDN_URL= IMAGE_HOSTING_R2_BUCKET_PATH= -IMAGE_HOSTING_R2_ACCESS_KEY_ID= # optional -IMAGE_HOSTING_R2_SECRET_ACCESS_KEY= # optional +R2_ACCESS_KEY_ID= # optional +R2_SECRET_ACCESS_KEY= # optional +# These two GRAPHER_CONFIG_ settings are used to store grapher configs in an R2 bucket. +# The cloudflare workers for thumbnail rendering etc use these settings to fetch the grapher configs. +# This means that for most local dev it is not necessary to set these. +GRAPHER_CONFIG_R2_BUCKET= # optional - for local dev set it to "owid-grapher-configs-staging" +GRAPHER_CONFIG_R2_BUCKET_PATH= # optional - for local dev set it to "devs/YOURNAME" + OPENAI_API_KEY= diff --git a/.gitignore b/.gitignore index 388475e6261..e33cb39db80 100755 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,4 @@ dist/ .nx/workspace-data .dev.vars **/tsup.config.bundled*.mjs +cfstorage/ diff --git a/adminSiteServer/apiRouter.ts b/adminSiteServer/apiRouter.ts index c5096c16b24..8b7ae33f20f 100644 --- a/adminSiteServer/apiRouter.ts +++ b/adminSiteServer/apiRouter.ts @@ -155,6 +155,14 @@ import { GdocDataInsight } from "../db/model/Gdoc/GdocDataInsight.js" import { GdocHomepage } from "../db/model/Gdoc/GdocHomepage.js" import { GdocAuthor } from "../db/model/Gdoc/GdocAuthor.js" import path from "path" +import { + deleteGrapherConfigFromR2, + deleteGrapherConfigFromR2ByUUID, + R2GrapherConfigDirectory, + saveGrapherConfigToR2, + saveGrapherConfigToR2ByUUID, + getMd5HashBase64, +} from "./chartConfigR2Helpers.js" const apiRouter = new FunctionalRouter() @@ -275,7 +283,7 @@ const expectChartById = async ( const saveNewChart = async ( knex: db.KnexReadWriteTransaction, { config, user }: { config: GrapherInterface; user: DbPlainUser } -): Promise => { +): Promise<{ patchConfig: GrapherInterface; fullConfig: GrapherInterface }> => { // if the schema version is missing, assume it's the latest if (!config["$schema"]) { config["$schema"] = defaultGrapherConfig["$schema"] @@ -285,16 +293,25 @@ const saveNewChart = async ( const parentConfig = defaultGrapherConfig const patchConfig = diffGrapherConfigs(config, parentConfig) const fullConfig = mergeGrapherConfigs(parentConfig, patchConfig) + const fullConfigStringified = JSON.stringify(fullConfig) + + // compute a sha-1 hash of the full config + const fullConfigMd5 = await getMd5HashBase64(fullConfigStringified) // insert patch & full configs into the chart_configs table - const configId = uuidv7() + const chartConfigId = uuidv7() await db.knexRaw( knex, `-- sql - INSERT INTO chart_configs (id, patch, full) - VALUES (?, ?, ?) + INSERT INTO chart_configs (id, patch, full, fullMd5) + VALUES (?, ?, ?, ?) `, - [configId, JSON.stringify(patchConfig), JSON.stringify(fullConfig)] + [ + chartConfigId, + JSON.stringify(patchConfig), + fullConfigStringified, + fullConfigMd5, + ] ) // add a new chart to the charts table @@ -304,7 +321,7 @@ const saveNewChart = async ( INSERT INTO charts (configId, lastEditedAt, lastEditedByUserId) VALUES (?, ?, ?) `, - [configId, new Date(), user.id] + [chartConfigId, new Date(), user.id] ) // The chart config itself has an id field that should store the id of the chart - update the chart now so this is true @@ -324,7 +341,9 @@ const saveNewChart = async ( [chartId, chartId, chartId] ) - return patchConfig + await saveGrapherConfigToR2ByUUID(chartConfigId, fullConfigStringified) + + return { patchConfig, fullConfig } } const updateExistingChart = async ( @@ -334,7 +353,7 @@ const updateExistingChart = async ( user, chartId, }: { config: GrapherInterface; user: DbPlainUser; chartId: number } -): Promise => { +): Promise<{ patchConfig: GrapherInterface; fullConfig: GrapherInterface }> => { // make sure that the id of the incoming config matches the chart id config.id = chartId @@ -347,19 +366,36 @@ const updateExistingChart = async ( const parentConfig = defaultGrapherConfig const patchConfig = diffGrapherConfigs(config, parentConfig) const fullConfig = mergeGrapherConfigs(parentConfig, patchConfig) + const fullConfigStringified = JSON.stringify(fullConfig) + + const fullConfigMd5 = await getMd5HashBase64(fullConfigStringified) + + const chartConfigId = await db.knexRawFirst>( + knex, + `SELECT configId FROM charts WHERE id = ?`, + [chartId] + ) + + if (!chartConfigId) + throw new JsonError(`No chart config found for id ${chartId}`, 404) // update configs await db.knexRaw( knex, `-- sql - UPDATE chart_configs cc - JOIN charts c ON c.configId = cc.id + UPDATE chart_configs SET - cc.patch=?, - cc.full=? - WHERE c.id = ? + patch=?, + full=?, + fullMd5=? + WHERE id = ? `, - [JSON.stringify(patchConfig), JSON.stringify(fullConfig), chartId] + [ + JSON.stringify(patchConfig), + fullConfigStringified, + fullConfigMd5, + chartConfigId.configId, + ] ) // update charts row @@ -373,7 +409,12 @@ const updateExistingChart = async ( [new Date(), user.id, chartId] ) - return patchConfig + await saveGrapherConfigToR2ByUUID( + chartConfigId.configId, + fullConfigStringified + ) + + return { patchConfig, fullConfig } } const saveGrapher = async ( @@ -443,6 +484,11 @@ const saveGrapher = async ( `INSERT INTO chart_slug_redirects (chart_id, slug) VALUES (?, ?)`, [existingConfig.id, existingConfig.slug] ) + // When we rename grapher configs, make sure to delete the old one (the new one will be saved below) + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${existingConfig.slug}.json` + ) } } @@ -457,20 +503,27 @@ const saveGrapher = async ( // Execute the actual database update or creation let chartId: number + let patchConfig: GrapherInterface + let fullConfig: GrapherInterface if (existingConfig) { chartId = existingConfig.id! - newConfig = await updateExistingChart(knex, { + const configs = await updateExistingChart(knex, { config: newConfig, user, chartId, }) + patchConfig = configs.patchConfig + fullConfig = configs.fullConfig } else { - newConfig = await saveNewChart(knex, { + const configs = await saveNewChart(knex, { config: newConfig, user, }) - chartId = newConfig.id! + patchConfig = configs.patchConfig + fullConfig = configs.fullConfig + chartId = fullConfig.id! } + newConfig = patchConfig // Record this change in version history const chartRevisionLog = { @@ -515,6 +568,17 @@ const saveGrapher = async ( newDimensions.map((d) => d.variableId) ) + if (newConfig.isPublished) { + const configStringified = JSON.stringify(fullConfig) + const configMd5 = await getMd5HashBase64(configStringified) + await saveGrapherConfigToR2( + configStringified, + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${newConfig.slug}.json`, + configMd5 + ) + } + if ( newConfig.isPublished && (!existingConfig || !existingConfig.isPublished) @@ -537,6 +601,10 @@ const saveGrapher = async ( `DELETE FROM chart_slug_redirects WHERE chart_id = ?`, [existingConfig.id] ) + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${existingConfig.slug}.json` + ) await triggerStaticBuild(user, `Unpublishing chart ${newConfig.slug}`) } else if (newConfig.isPublished) await triggerStaticBuild(user, `Updating chart ${newConfig.slug}`) @@ -883,11 +951,13 @@ deleteRouteWithRWTransaction( [chart.id] ) - const row = await db.knexRawFirst<{ configId: number }>( + const row = await db.knexRawFirst>( trx, `SELECT configId FROM charts WHERE id = ?`, [chart.id] ) + if (!row) + throw new JsonError(`No chart config found for id ${chart.id}`, 404) if (row) { await db.knexRaw(trx, `DELETE FROM charts WHERE id=?`, [chart.id]) await db.knexRaw(trx, `DELETE FROM chart_configs WHERE id=?`, [ @@ -901,6 +971,13 @@ deleteRouteWithRWTransaction( `Deleting chart ${chart.slug}` ) + await deleteGrapherConfigFromR2ByUUID(row.configId) + if (chart.isPublished) + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.publishedGrapherBySlug, + `${chart.slug}.json` + ) + return { success: true } } ) diff --git a/adminSiteServer/chartConfigR2Helpers.ts b/adminSiteServer/chartConfigR2Helpers.ts new file mode 100644 index 00000000000..781fbd233fb --- /dev/null +++ b/adminSiteServer/chartConfigR2Helpers.ts @@ -0,0 +1,173 @@ +import { + GRAPHER_CONFIG_R2_BUCKET, + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2_ACCESS_KEY_ID, + R2_ENDPOINT, + R2_REGION, + R2_SECRET_ACCESS_KEY, +} from "../settings/serverSettings.js" +import { + DeleteObjectCommand, + DeleteObjectCommandInput, + PutObjectCommand, + PutObjectCommandInput, + S3Client, +} from "@aws-sdk/client-s3" +import { Base64String, JsonError } from "@ourworldindata/utils" +import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js" +import { createHash } from "crypto" + +export function getMd5HashBase64(data: string): Base64String { + // I would have liked to create a function in utils that can compute a varienty of hashes + // in both the browser, CF workers and node but unfortunately this isn't easily possible + // for md5 - so here we just special case for md5, node and base64 encoding for now. + return createHash("md5") + .update(data, "utf-8") + .digest("base64") as Base64String +} +export enum R2GrapherConfigDirectory { + byUUID = "config/by-uuid", + publishedGrapherBySlug = "grapher/by-slug", +} + +let s3Client: S3Client | undefined = undefined + +export async function saveGrapherConfigToR2ByUUID( + id: string, + chartConfigStringified: string +) { + const configMd5 = await getMd5HashBase64(chartConfigStringified) + + await saveGrapherConfigToR2( + chartConfigStringified, + R2GrapherConfigDirectory.byUUID, + `${id}.json`, + configMd5 + ) +} + +export async function deleteGrapherConfigFromR2ByUUID(id: string) { + await deleteGrapherConfigFromR2( + R2GrapherConfigDirectory.byUUID, + `${id}.json` + ) +} + +export async function saveGrapherConfigToR2( + config_stringified: string, + directory: R2GrapherConfigDirectory, + filename: string, + configMd5: Base64String +) { + if (process.env.NODE_ENV === "test") { + console.log("Skipping saving grapher config to R2 in test environment") + return + } + if ( + GRAPHER_CONFIG_R2_BUCKET === undefined || + GRAPHER_CONFIG_R2_BUCKET_PATH === undefined + ) { + console.info( + "R2 bucket not configured, not storing grapher config to R2" + ) + return + } + try { + if (!s3Client) { + s3Client = new S3Client({ + endpoint: R2_ENDPOINT, + forcePathStyle: false, + region: R2_REGION, + credentials: { + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, + }, + }) + } + + if (!GRAPHER_CONFIG_R2_BUCKET || !GRAPHER_CONFIG_R2_BUCKET_PATH) { + throw new Error("R2 bucket not configured") + } + + const bucket = GRAPHER_CONFIG_R2_BUCKET + const path = [GRAPHER_CONFIG_R2_BUCKET_PATH, directory, filename].join( + "/" + ) + + const MIMEType = "application/json" + + const params: PutObjectCommandInput = { + Bucket: bucket, + Key: path, + Body: config_stringified, + ContentType: MIMEType, + ContentMD5: configMd5, + } + + await s3Client.send(new PutObjectCommand(params)) + console.log( + `Successfully uploaded object: ${params.Bucket}/${params.Key}` + ) + } catch (err) { + await logErrorAndMaybeSendToBugsnag(err) + throw new JsonError( + `Failed to save the grapher config to R2. Inner error: ${err}` + ) + } +} + +export async function deleteGrapherConfigFromR2( + directory: R2GrapherConfigDirectory, + filename: string +) { + if (process.env.NODE_ENV === "test") { + console.log("Skipping saving grapher config to R2 in test environment") + return + } + if ( + GRAPHER_CONFIG_R2_BUCKET === undefined || + GRAPHER_CONFIG_R2_BUCKET_PATH === undefined + ) { + console.info( + "R2 bucket not configured, not deleting grapher config to R2" + ) + return + } + try { + if (!s3Client) { + s3Client = new S3Client({ + endpoint: R2_ENDPOINT, + forcePathStyle: false, + region: R2_REGION, + credentials: { + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, + }, + }) + } + + if (!GRAPHER_CONFIG_R2_BUCKET || !GRAPHER_CONFIG_R2_BUCKET_PATH) { + throw new Error("R2 bucket not configured") + } + + const bucket = GRAPHER_CONFIG_R2_BUCKET + const path = [GRAPHER_CONFIG_R2_BUCKET_PATH, directory, filename].join( + "/" + ) + + const params: DeleteObjectCommandInput = { + Bucket: bucket, + Key: path, + } + + await s3Client.send(new DeleteObjectCommand(params)) + console.log( + `Successfully deleted object: ${params.Bucket}/${params.Key}` + ) + } catch (err) { + await logErrorAndMaybeSendToBugsnag(err) + throw new JsonError( + `Failed to delete the grapher config to R2 at ${directory}/${filename}. Inner error: ${err}` + ) + } +} diff --git a/db/migration/1722415645057-AddChartConfigHash.ts b/db/migration/1722415645057-AddChartConfigHash.ts new file mode 100644 index 00000000000..8885900a088 --- /dev/null +++ b/db/migration/1722415645057-AddChartConfigHash.ts @@ -0,0 +1,22 @@ +import { MigrationInterface, QueryRunner } from "typeorm" + +export class AddChartConfigHash1722415645057 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + ALTER TABLE chart_configs + ADD COLUMN fullMd5 CHAR(24); + `) + + await queryRunner.query(` + UPDATE chart_configs + SET fullMd5 = to_base64(unhex(md5(full))) + `) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + ALTER TABLE chart_configs + DROP COLUMN fullMd5; + `) + } +} diff --git a/db/model/Image.ts b/db/model/Image.ts index 60799fc9782..7042f529eca 100644 --- a/db/model/Image.ts +++ b/db/model/Image.ts @@ -21,10 +21,10 @@ import { } from "@ourworldindata/utils" import { OwidGoogleAuth } from "../OwidGoogleAuth.js" import { - IMAGE_HOSTING_R2_ENDPOINT, - IMAGE_HOSTING_R2_ACCESS_KEY_ID, - IMAGE_HOSTING_R2_SECRET_ACCESS_KEY, - IMAGE_HOSTING_R2_REGION, + R2_ENDPOINT, + R2_ACCESS_KEY_ID, + R2_SECRET_ACCESS_KEY, + R2_REGION, IMAGE_HOSTING_R2_BUCKET_PATH, GDOCS_CLIENT_EMAIL, GDOCS_SHARED_DRIVE_ID, @@ -139,12 +139,12 @@ class ImageStore { export const imageStore = new ImageStore() export const s3Client = new S3Client({ - endpoint: IMAGE_HOSTING_R2_ENDPOINT, + endpoint: R2_ENDPOINT, forcePathStyle: false, - region: IMAGE_HOSTING_R2_REGION, + region: R2_REGION, credentials: { - accessKeyId: IMAGE_HOSTING_R2_ACCESS_KEY_ID, - secretAccessKey: IMAGE_HOSTING_R2_SECRET_ACCESS_KEY, + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, }, }) diff --git a/devTools/syncGraphersToR2/syncGraphersToR2.ts b/devTools/syncGraphersToR2/syncGraphersToR2.ts new file mode 100644 index 00000000000..bedbc1d722d --- /dev/null +++ b/devTools/syncGraphersToR2/syncGraphersToR2.ts @@ -0,0 +1,292 @@ +import fs from "fs-extra" +import parseArgs from "minimist" +import { + DeleteObjectCommand, + DeleteObjectCommandInput, + ListObjectsCommand, + ListObjectsV2Command, + ListObjectsV2CommandOutput, + PutObjectCommand, + PutObjectCommandInput, + S3Client, +} from "@aws-sdk/client-s3" +import { + GRAPHER_CONFIG_R2_BUCKET, + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2_ACCESS_KEY_ID, + R2_ENDPOINT, + R2_REGION, + R2_SECRET_ACCESS_KEY, +} from "../../settings/serverSettings.js" +import { + knexRaw, + KnexReadonlyTransaction, + knexReadonlyTransaction, +} from "../../db/db.js" +import { R2GrapherConfigDirectory } from "../../adminSiteServer/chartConfigR2Helpers.js" +import { + base64ToBytes, + bytesToBase64, + DbRawChartConfig, + differenceOfSets, + excludeUndefined, + HexString, + hexToBytes, +} from "@ourworldindata/utils" +import { string } from "ts-pattern/dist/patterns.js" +import { chunk, take } from "lodash" +import ProgressBar from "progress" + +type HashAndId = Pick + +/** Sync a set of chart configs with R2. Pass in a map of the keys to their md5 hashes and UUIDs + and this function will upsert all missing/outdated ones and delete any that are no longer needed. + + @param s3Client The S3 client to use + @param pathPrefix The path prefix to use for the files (e.g. "config/by-uuid" then everything inside it will be synced) + @param hashesOfFilesToToUpsert A map of the keys to their md5 hashes and UUIDs + @param trx The transaction to use for querying the DB for full configs + @param dryRun Whether to actually make changes to R2 or just log what would + */ +async function syncWithR2( + s3Client: S3Client, + pathPrefix: string, + hashesOfFilesToToUpsert: Map, + trx: KnexReadonlyTransaction, + dryRun: boolean = false +) { + // We'll first get all the files in the R2 bucket under the path prefix + // and check if the hash of each file that exist in R2 matches the hash + // of the file we want to upsert. If it does, we'll remove it from the + // list of files to upsert. If it doesn't, we'll add it to the list of + // files to delete. + + const hashesOfFilesToDelete = new Map() + + // list the files in the R2 bucket. There may be more files in the + // bucket than can be returned in one list operation so loop until + // all files are listed + let continuationToken: string | undefined = undefined + do { + const listObjectsCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Prefix: pathPrefix, + ContinuationToken: continuationToken, + } + const listObjectsCommandOutput: ListObjectsV2CommandOutput = + await s3Client.send( + new ListObjectsV2Command(listObjectsCommandInput) + ) + if ((listObjectsCommandOutput.Contents?.length ?? 0) > 0) { + listObjectsCommandOutput.Contents!.forEach((object) => { + if (object.Key && object.ETag) { + // For some reason the etag has quotes around it, strip those + const md5 = object.ETag.replace(/"/g, "") as HexString + const md5Base64 = bytesToBase64(hexToBytes(md5)) + + if (hashesOfFilesToToUpsert.has(object.Key)) { + if ( + hashesOfFilesToToUpsert.get(object.Key)?.fullMd5 === + md5Base64 + ) { + hashesOfFilesToToUpsert.delete(object.Key) + } + // If the existing full config in R2 is different then + // we just keep the hashesOfFilesToToUpsert entry around + // which will upsert the new full config later on + } else { + // if the file in R2 is not in the list of files to upsert + // then we should delete it + hashesOfFilesToDelete.set(object.Key, md5Base64) + } + } + }) + } + continuationToken = listObjectsCommandOutput.NextContinuationToken + } while (continuationToken) + + console.log("Number of files to upsert", hashesOfFilesToToUpsert.size) + console.log("Number of files to delete", hashesOfFilesToDelete.size) + + let progressBar = new ProgressBar( + "--- Deleting obsolete configs [:bar] :current/:total :elapseds\n", + { + total: hashesOfFilesToDelete.size, + } + ) + + // Delete the files in R2 that are no longer needed + for (const batch of chunk([...hashesOfFilesToDelete.entries()], 100)) { + const deletePromises = batch.map(async ([key, _]) => { + const deleteObjectCommandInput: DeleteObjectCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Key: key, + } + if (!dryRun) + await s3Client.send( + new DeleteObjectCommand(deleteObjectCommandInput) + ) + else console.log("Would have deleted", key) + progressBar.tick() + }) + await Promise.allSettled(deletePromises) + } + + console.log("Finished deletes") + + progressBar = new ProgressBar( + "--- Storing missing configs [:bar] :current/:total :elapseds\n", + { + total: hashesOfFilesToToUpsert.size, + } + ) + + const errors = [] + + // Chunk the inserts so that we don't need to keep all the full configs in memory + for (const batch of chunk([...hashesOfFilesToToUpsert.entries()], 100)) { + // Get the full configs for the batch + const fullConfigs = await knexRaw< + Pick + >(trx, `select id, full from chart_configs where id in (?)`, [ + batch.map((entry) => entry[1].id), + ]) + const fullConfigMap = new Map( + fullConfigs.map(({ id, full }) => [id, full]) + ) + + // Upload the full configs to R2 in parallel + const uploadPromises = batch.map(async ([key, val]) => { + const id = val.id + const fullMd5 = val.fullMd5 + const full = fullConfigMap.get(id) + if (full === undefined) { + return Promise.reject( + new Error(`Full config not found for id ${id}`) + ) + } + const putObjectCommandInput: PutObjectCommandInput = { + Bucket: GRAPHER_CONFIG_R2_BUCKET, + Key: key, + Body: full, + ContentMD5: fullMd5, + ContentType: "application/json", + } + if (!dryRun) + await s3Client.send(new PutObjectCommand(putObjectCommandInput)) + else console.log("Would have upserted", key) + progressBar.tick() + return + }) + const promiseResults = await Promise.allSettled(uploadPromises) + const batchErrors = promiseResults + .filter((result) => result.status === "rejected") + .map((result) => result.reason) + errors.push(...batchErrors) + } + + console.log("Finished upserts") + if (errors.length > 0) { + console.error(`${errors.length} Errors during upserts`) + for (const error of errors) { + console.error(error) + } + } +} + +async function main(parsedArgs: parseArgs.ParsedArgs, dryRun: boolean) { + if ( + GRAPHER_CONFIG_R2_BUCKET === undefined || + GRAPHER_CONFIG_R2_BUCKET_PATH === undefined + ) { + console.info("R2 bucket not configured, exiting") + return + } + + const s3Client = new S3Client({ + endpoint: R2_ENDPOINT, + forcePathStyle: false, + region: R2_REGION, + credentials: { + accessKeyId: R2_ACCESS_KEY_ID, + secretAccessKey: R2_SECRET_ACCESS_KEY, + }, + }) + + const hashesOfFilesToToUpsertBySlug = new Map() + const hashesOfFilesToToUpsertByUuid = new Map() + const pathPrefixBySlug = excludeUndefined([ + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2GrapherConfigDirectory.publishedGrapherBySlug, + ]).join("/") + + const pathPrefixByUuid = excludeUndefined([ + GRAPHER_CONFIG_R2_BUCKET_PATH, + R2GrapherConfigDirectory.byUUID, + ]).join("/") + + await knexReadonlyTransaction(async (trx) => { + // Sync charts published by slug + const slugsAndHashesFromDb = await knexRaw< + Pick + >( + trx, + `select slug, fullMd5, id + from chart_configs + where slug is not null + and full ->> '$.isPublished' = "true"` + ) + + slugsAndHashesFromDb.forEach((row) => { + hashesOfFilesToToUpsertBySlug.set( + `${pathPrefixBySlug}/${row.slug}.json`, + { + fullMd5: row.fullMd5, + id: row.id, + } + ) + }) + + await syncWithR2( + s3Client, + pathPrefixBySlug, + hashesOfFilesToToUpsertBySlug, + trx, + dryRun + ) + + // Sync charts by UUID + const slugsAndHashesFromDbByUuid = await knexRaw< + Pick + >(trx, `select fullMd5, id from chart_configs`) + + slugsAndHashesFromDbByUuid.forEach((row) => { + hashesOfFilesToToUpsertByUuid.set( + `${pathPrefixByUuid}/${row.id}.json`, + { + fullMd5: row.fullMd5, + id: row.id, + } + ) + }) + + await syncWithR2( + s3Client, + pathPrefixByUuid, + hashesOfFilesToToUpsertByUuid, + trx, + dryRun + ) + }) +} + +const parsedArgs = parseArgs(process.argv.slice(2)) +if (parsedArgs["h"]) { + console.log( + `syncGraphersToR2.js - sync grapher configs from the chart_configs table to R2 + +--dry-run: Don't make any actual changes to R2` + ) +} else { + main(parsedArgs, parsedArgs["dry-run"]) +} diff --git a/devTools/syncGraphersToR2/tsconfig.json b/devTools/syncGraphersToR2/tsconfig.json new file mode 100644 index 00000000000..74f2eaadbb6 --- /dev/null +++ b/devTools/syncGraphersToR2/tsconfig.json @@ -0,0 +1,18 @@ +{ + "extends": "../tsconfigs/tsconfig.base.json", + "compilerOptions": { + "outDir": "../../itsJustJavascript/devTools/syncGrapherToR2", + "rootDir": "." + }, + "references": [ + { + "path": "../../db" + }, + { + "path": "../../adminSiteServer" + }, + { + "path": "../../settings" + } + ] +} diff --git a/package.json b/package.json index 8ab030f444c..20f8e503d94 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,8 @@ "testPrettierAll": "yarn prettier --check \"**/*.{tsx,ts,jsx,js,json,md,html,css,scss,yml}\"", "testJest": "lerna run buildTests && jest", "testSiteNavigation": "tsx --tsconfig tsconfig.tsx.json devTools/navigationTest/navigationTest.ts", - "generateDbTypes": "npx @rmp135/sql-ts -c db/sql-ts/sql-ts-config.json" + "generateDbTypes": "npx @rmp135/sql-ts -c db/sql-ts/sql-ts-config.json", + "syncGraphersToR2": "tsx --tsconfig tsconfig.tsx.json devTools/syncGraphersToR2/syncGraphersToR2.ts" }, "dependencies": { "@algolia/autocomplete-js": "^1.17.2", diff --git a/packages/@ourworldindata/types/src/NominalType.ts b/packages/@ourworldindata/types/src/NominalType.ts index f3487f54232..f24497dfb29 100644 --- a/packages/@ourworldindata/types/src/NominalType.ts +++ b/packages/@ourworldindata/types/src/NominalType.ts @@ -20,3 +20,11 @@ declare const __nominal__type: unique symbol export type Nominal = Type & { readonly [__nominal__type]: Identifier } + +export function wrap(obj: T): Nominal { + return obj as Nominal +} + +export function unwrap(obj: Nominal): T { + return obj +} diff --git a/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts b/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts index 24c98ee6b6f..b1db3c1e82b 100644 --- a/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts +++ b/packages/@ourworldindata/types/src/dbTypes/ChartConfigs.ts @@ -6,6 +6,7 @@ export interface DbInsertChartConfig { id: string patch: JsonString full: JsonString + fullMd5?: string slug?: string | null createdAt?: Date updatedAt?: Date | null diff --git a/packages/@ourworldindata/types/src/index.ts b/packages/@ourworldindata/types/src/index.ts index 3ca08233205..3c8e8af77d0 100644 --- a/packages/@ourworldindata/types/src/index.ts +++ b/packages/@ourworldindata/types/src/index.ts @@ -646,7 +646,7 @@ export { export { RedirectCode, type DbPlainRedirect } from "./dbTypes/Redirects.js" -export type { Nominal } from "./NominalType.js" +export { type Nominal, wrap, unwrap } from "./NominalType.js" export { type DbRawLatestWork, diff --git a/packages/@ourworldindata/utils/src/Util.test.ts b/packages/@ourworldindata/utils/src/Util.test.ts index c1cd463fac5..a10e4c9efd0 100755 --- a/packages/@ourworldindata/utils/src/Util.test.ts +++ b/packages/@ourworldindata/utils/src/Util.test.ts @@ -29,12 +29,17 @@ import { traverseEnrichedBlock, cartesian, formatInlineList, + base64ToBytes, + bytesToBase64, + hexToBytes, + bytesToHex, } from "./Util.js" import { BlockImageSize, OwidEnrichedGdocBlock, SortOrder, } from "@ourworldindata/types" +import { webcrypto as crypto } from "node:crypto" describe(findClosestTime, () => { describe("without tolerance", () => { @@ -795,3 +800,24 @@ describe(formatInlineList, () => { ) }) }) + +function generateRandomBytes(length: number): Uint8Array { + const bytes = new Uint8Array(length) + crypto.getRandomValues(bytes) + return bytes +} + +describe("hex/base64 conversion is reversible", () => { + const originalBytes = generateRandomBytes(33) + const base64String = bytesToBase64(originalBytes) + const roundTrippedBytes = base64ToBytes(base64String) + it("is the same after converting to base64 and back", () => { + expect(originalBytes).toEqual(roundTrippedBytes) + }) + + const hexString = bytesToHex(originalBytes) + const roundTrippedBytesHex = hexToBytes(hexString) + it("is the same after converting to hex and back", () => { + expect(originalBytes).toEqual(roundTrippedBytesHex) + }) +}) diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index 93f0aa0289e..58c753efa91 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -174,10 +174,12 @@ import { TagGraphRoot, TagGraphRootName, TagGraphNode, + Nominal, } from "@ourworldindata/types" import { PointVector } from "./PointVector.js" import React from "react" import { match, P } from "ts-pattern" +// import "crypto" export type NoUndefinedValues = { [P in keyof T]: Required> @@ -454,6 +456,42 @@ export const cagr = ( ) } +export type Base64String = Nominal +export type HexString = Nominal + +export function base64ToBytes(base64: Base64String): Uint8Array { + const binString = atob(base64) + return Uint8Array.from(binString, (m) => { + const cp = m.codePointAt(0) + if (cp === undefined) throw new Error("Invalid base64") + return cp + }) +} + +export function bytesToBase64(bytes: Uint8Array): Base64String { + const binString = Array.from(bytes, (byte) => + String.fromCodePoint(byte) + ).join("") + return btoa(binString) as Base64String +} + +export function hexToBytes(hex: string): Uint8Array { + if (hex.length % 2 !== 0) throw new Error("Invalid hex") + const bytes = new Uint8Array(hex.length / 2) + for (let i = 0; i < hex.length; i += 2) { + const parsed = parseInt(hex.slice(i, i + 2), 16) + if (isNaN(parsed)) throw new Error("Invalid hex") + bytes[i / 2] = parsed + } + return bytes +} + +export function bytesToHex(bytes: Uint8Array): HexString { + return Array.from(bytes) + .map((byte) => byte.toString(16).padStart(2, "0")) + .join("") as HexString +} + export const makeAnnotationsSlug = (columnSlug: string): string => `${columnSlug}-annotations` diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index bad8f5efa17..c94ef244060 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -20,6 +20,12 @@ export { firstOfNonEmptyArray, lastOfNonEmptyArray, mapToObjectLiteral, + type Base64String, + type HexString, + bytesToBase64, + base64ToBytes, + bytesToHex, + hexToBytes, next, previous, domainExtent, diff --git a/settings/serverSettings.ts b/settings/serverSettings.ts index c6f3c42cbf2..d945b49173c 100644 --- a/settings/serverSettings.ts +++ b/settings/serverSettings.ts @@ -154,22 +154,29 @@ export const IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH: string = IMAGE_HOSTING_R2_BUCKET_PATH.indexOf("/") + 1 ) // extract R2 credentials from rclone config as defaults -export const IMAGE_HOSTING_R2_ENDPOINT: string = - serverSettings.IMAGE_HOSTING_R2_ENDPOINT || +export const R2_ENDPOINT: string = + serverSettings.R2_ENDPOINT || rcloneConfig["owid-r2"]?.endpoint || "https://078fcdfed9955087315dd86792e71a7e.r2.cloudflarestorage.com" -export const IMAGE_HOSTING_R2_ACCESS_KEY_ID: string = - serverSettings.IMAGE_HOSTING_R2_ACCESS_KEY_ID || +export const R2_ACCESS_KEY_ID: string = + serverSettings.R2_ACCESS_KEY_ID || rcloneConfig["owid-r2"]?.access_key_id || "" -export const IMAGE_HOSTING_R2_SECRET_ACCESS_KEY: string = - serverSettings.IMAGE_HOSTING_R2_SECRET_ACCESS_KEY || +export const R2_SECRET_ACCESS_KEY: string = + serverSettings.R2_SECRET_ACCESS_KEY || rcloneConfig["owid-r2"]?.secret_access_key || "" -export const IMAGE_HOSTING_R2_REGION: string = - serverSettings.IMAGE_HOSTING_R2_REGION || - rcloneConfig["owid-r2"]?.region || - "auto" +export const R2_REGION: string = + serverSettings.R2_REGION || rcloneConfig["owid-r2"]?.region || "auto" + +export const GRAPHER_CONFIG_BASE_URL: string = + serverSettings.GRAPHER_CONFIG_BASE_URL || + "https://ourworldindata.org/grapher/" + +export const GRAPHER_CONFIG_R2_BUCKET: string | undefined = + serverSettings.GRAPHER_CONFIG_R2_BUCKET +export const GRAPHER_CONFIG_R2_BUCKET_PATH: string | undefined = + serverSettings.GRAPHER_CONFIG_R2_BUCKET_PATH export const DATA_API_URL: string = clientSettings.DATA_API_URL diff --git a/site/gdocs/components/Image.tsx b/site/gdocs/components/Image.tsx index 57e58bbb967..3aceb05b773 100644 --- a/site/gdocs/components/Image.tsx +++ b/site/gdocs/components/Image.tsx @@ -115,9 +115,7 @@ export default function Image(props: { if (isPreviewing) { const makePreviewUrl = (f: string) => - `${IMAGE_HOSTING_R2_CDN_URL}/${IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH}/${encodeURIComponent( - f - )}` + `${IMAGE_HOSTING_R2_CDN_URL}/${IMAGE_HOSTING_R2_BUCKET_SUBFOLDER_PATH}/${encodeURIComponent(f)}` const PreviewSource = (props: { i?: ImageMetadata; sm?: boolean }) => { const { i, sm } = props diff --git a/tsconfig.json b/tsconfig.json index 94bbeed9aae..ae863d527a2 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -54,6 +54,9 @@ }, { "path": "./devTools/navigationTest" + }, + { + "path": "./devTools/syncGraphersToR2" } ] }