From 1ee5eefe97a1a7088a68e8abad508b12004c0e01 Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 24 Oct 2023 10:39:56 +0200 Subject: [PATCH] data-page-revamp-citations --- baker/GrapherBaker.tsx | 43 ++++- datapage/Datapage.ts | 6 +- db/migrateWpPostsToArchieMl.ts | 6 +- db/model/Post.ts | 10 +- .../grapher/src/core/Grapher.tsx | 3 +- packages/@ourworldindata/utils/src/Util.ts | 29 +++- packages/@ourworldindata/utils/src/index.ts | 3 + .../@ourworldindata/utils/src/owidTypes.ts | 8 +- site/DataPageV2Content.tsx | 163 +++++++----------- site/gdocs/OwidGdoc.tsx | 15 +- site/gdocs/utils.tsx | 13 +- 11 files changed, 179 insertions(+), 120 deletions(-) diff --git a/baker/GrapherBaker.tsx b/baker/GrapherBaker.tsx index 8b5e74794f4..080bac29356 100644 --- a/baker/GrapherBaker.tsx +++ b/baker/GrapherBaker.tsx @@ -24,6 +24,8 @@ import { FaqEntryData, FaqDictionary, partition, + PrimaryTopic, + sortBy, } from "@ourworldindata/utils" import { getRelatedArticles, @@ -45,7 +47,7 @@ import { import * as db from "../db/db.js" import { glob } from "glob" import { isPathRedirectedToExplorer } from "../explorerAdminServer/ExplorerRedirects.js" -import { getPostBySlug } from "../db/model/Post.js" +import { bySlug, getPostBySlug, parsePostAuthors } from "../db/model/Post.js" import { GrapherInterface } from "@ourworldindata/grapher" import workerpool from "workerpool" import ProgressBar from "progress" @@ -60,11 +62,14 @@ import { getDatapageJson, parseGdocContentFromAllowedLevelOneHeadings, } from "../datapage/Datapage.js" +import { slugify_topic } from "../site/DataPageV2Content.js" import { ExplorerProgram } from "../explorer/ExplorerProgram.js" import { Image } from "../db/model/Image.js" import { logErrorAndMaybeSendToBugsnag } from "../serverUtils/errorLog.js" import { parseFaqs } from "../db/model/Gdoc/rawToEnriched.js" +import { Gdoc } from "../db/model/Gdoc/Gdoc.js" +import { getShortPageCitation } from "../site/gdocs/utils.js" /** * @@ -322,6 +327,42 @@ export async function renderDataPageV2({ grapherConfigForVariable ?? {} ) + const firstTopicTag = datapageData.topicTagsLinks?.[0] + + if (firstTopicTag) { + const gdoc = await Gdoc.findOne({ + where: { + slug: slugify_topic(firstTopicTag), + }, + relations: ["tags"], + }) + if (gdoc) { + const citation = getShortPageCitation( + gdoc.content.authors, + gdoc.content.title ?? "", + gdoc?.publishedAt + ) + datapageData.primaryTopic = { + topicTag: firstTopicTag, + citation, + } + } else { + const post = await bySlug(slugify_topic(firstTopicTag)) + if (post) { + const authors = parsePostAuthors(post.authors) + const citation = getShortPageCitation( + authors, + post.title, + post.published_at + ) + datapageData.primaryTopic = { + topicTag: firstTopicTag, + citation, + } + } + } + } + // Get the charts this variable is being used in (aka "related charts") // and exclude the current chart to avoid duplicates datapageData.allCharts = await getRelatedChartsForVariable( diff --git a/datapage/Datapage.ts b/datapage/Datapage.ts index 173383d7b5b..1a9dbbe3977 100644 --- a/datapage/Datapage.ts +++ b/datapage/Datapage.ts @@ -16,9 +16,9 @@ import { DataPageDataV2, OwidVariableWithSource, dayjs, - getAttributionFromVariable, gdocIdRegex, getETLPathComponents, + getAttributionFragmentsFromVariable, } from "@ourworldindata/utils" import { ExplorerProgram } from "../explorer/ExplorerProgram.js" import { Gdoc } from "../db/model/Gdoc/Gdoc.js" @@ -29,7 +29,7 @@ export const getDatapageDataV2 = async ( partialGrapherConfig: GrapherInterface ): Promise => { { - const processingLevel = variableMetadata.processingLevel ?? "major" + const processingLevel = variableMetadata.processingLevel ?? "minor" const version = getETLPathComponents(variableMetadata.catalogPath ?? "")?.version ?? "" @@ -59,7 +59,7 @@ export const getDatapageDataV2 = async ( attributionShort: variableMetadata.presentation?.attributionShort, titleVariant: variableMetadata.presentation?.titleVariant, topicTagsLinks: variableMetadata.presentation?.topicTagsLinks ?? [], - attribution: getAttributionFromVariable(variableMetadata), + attributions: getAttributionFragmentsFromVariable(variableMetadata), faqs: [], descriptionKey: variableMetadata.descriptionKey ?? [], descriptionProcessing: variableMetadata.descriptionProcessing, diff --git a/db/migrateWpPostsToArchieMl.ts b/db/migrateWpPostsToArchieMl.ts index 1269b944d96..481a4a36d1b 100644 --- a/db/migrateWpPostsToArchieMl.ts +++ b/db/migrateWpPostsToArchieMl.ts @@ -4,7 +4,6 @@ import cheerio from "cheerio" import { OwidGdocPublicationContext, OwidGdocInterface, - sortBy, OwidArticleBackportingStatistics, OwidGdocType, RelatedChart, @@ -19,6 +18,7 @@ import { adjustHeadingLevels, } from "./model/Gdoc/htmlToEnriched.js" import { getRelatedCharts } from "./wpdb.js" +import { parsePostAuthors } from "./model/Post.js" // slugs from all the linear entries we want to migrate from @edomt const entries = new Set([ @@ -162,9 +162,7 @@ const migrate = async (): Promise => { title: post.title, subtitle: post.excerpt, excerpt: post.excerpt, - authors: sortBy(authors, ["order"]).map( - (author) => author.author - ), + authors: parsePostAuthors(post.authors), dateline: dateline, // TODO: this discards block level elements - those might be needed? refs: undefined, diff --git a/db/model/Post.ts b/db/model/Post.ts index fbe0d958433..eebee925001 100644 --- a/db/model/Post.ts +++ b/db/model/Post.ts @@ -1,6 +1,6 @@ import * as db from "../db.js" import { Knex } from "knex" -import { PostRow } from "@ourworldindata/utils" +import { PostRow, sortBy } from "@ourworldindata/utils" export const postsTable = "posts" @@ -49,6 +49,14 @@ export const setTags = async ( export const bySlug = async (slug: string): Promise => (await db.knexTable("posts").where({ slug: slug }))[0] +/** The authors field in the posts table is a json column that contains an array of + { order: 1, authors: "Max Mustermann" } like records. This function parses the + string and returns a simple string array of author names in the correct order */ +export const parsePostAuthors = (authorsJson: string): string[] => { + const authors = JSON.parse(authorsJson) + return sortBy(authors, ["order"]).map((author) => author.author) +} + export const setTagsForPost = async ( postId: number, tagIds: number[] diff --git a/packages/@ourworldindata/grapher/src/core/Grapher.tsx b/packages/@ourworldindata/grapher/src/core/Grapher.tsx index 12855174dd6..58c1433a245 100644 --- a/packages/@ourworldindata/grapher/src/core/Grapher.tsx +++ b/packages/@ourworldindata/grapher/src/core/Grapher.tsx @@ -1535,7 +1535,8 @@ export class Grapher const uniqueAttributions = uniq(compact(attributions)) - if (uniqueAttributions.length > 3) return "Multiple sources" + if (uniqueAttributions.length > 3) + return `${attributions[0]} and other sources` return uniqueAttributions.join("; ") } diff --git a/packages/@ourworldindata/utils/src/Util.ts b/packages/@ourworldindata/utils/src/Util.ts index a6e596534a4..585f53fbe73 100644 --- a/packages/@ourworldindata/utils/src/Util.ts +++ b/packages/@ourworldindata/utils/src/Util.ts @@ -1743,16 +1743,22 @@ export function getOriginAttributionFragments( export function getAttributionFromVariable( variable: OwidVariableWithSource ): string { + return getAttributionFragmentsFromVariable(variable).join("; ") +} + +export function getAttributionFragmentsFromVariable( + variable: OwidVariableWithSource +): string[] { if ( variable.presentation?.attribution && variable.presentation?.attribution !== "" ) - return variable.presentation?.attribution + return [variable.presentation?.attribution] const originAttributionFragments = getOriginAttributionFragments( variable.origins ) const sourceName = variable.source?.name - return uniq(compact([sourceName, ...originAttributionFragments])).join("; ") + return uniq(compact([sourceName, ...originAttributionFragments])) } interface ETLPathComponents { @@ -1769,3 +1775,22 @@ export const getETLPathComponents = (path: string): ETLPathComponents => { path.split("/") return { channel, producer, version, dataset, table, indicator } } + +export const formatAuthors = ({ + authors, + requireMax, + forBibtex, +}: { + authors: string[] + requireMax?: boolean + forBibtex?: boolean +}): string => { + if (requireMax && !authors.includes("Max Roser")) + authors = [...authors, "Max Roser"] + + let authorsText = authors.slice(0, -1).join(forBibtex ? " and " : ", ") + if (authorsText.length === 0) authorsText = authors[0] + else authorsText += ` and ${last(authors)}` + + return authorsText +} diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index 97a91a7cbf5..5cdf2bd106e 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -215,6 +215,7 @@ export { type DataPageDataV2, type DataPageRelatedData, type DataPageRelatedResearch, + type PrimaryTopic, type FaqLink, type FaqDictionary, type RawBlockResearchAndWritingRow, @@ -340,8 +341,10 @@ export { mergePartialGrapherConfigs, getOriginAttributionFragments, getAttributionFromVariable, + getAttributionFragmentsFromVariable, copyToClipboard, getETLPathComponents, + formatAuthors, } from "./Util.js" export { diff --git a/packages/@ourworldindata/utils/src/owidTypes.ts b/packages/@ourworldindata/utils/src/owidTypes.ts index 79209f4e81f..3079ed75ddc 100644 --- a/packages/@ourworldindata/utils/src/owidTypes.ts +++ b/packages/@ourworldindata/utils/src/owidTypes.ts @@ -1464,13 +1464,19 @@ export interface FaqLink { fragmentId: string } +export interface PrimaryTopic { + topicTag: string + citation: string +} + export interface DataPageDataV2 { status: "published" | "draft" title: string titleVariant?: string attributionShort?: string topicTagsLinks?: string[] - attribution: string + primaryTopic?: PrimaryTopic + attributions: string[] descriptionShort?: string descriptionFromProducer?: string faqs: FaqLink[] // Todo: resolve these at this level to the point where we can preview them diff --git a/site/DataPageV2Content.tsx b/site/DataPageV2Content.tsx index 6bb2df78038..82b6fe137b2 100644 --- a/site/DataPageV2Content.tsx +++ b/site/DataPageV2Content.tsx @@ -5,7 +5,6 @@ import { Grapher, GrapherInterface } from "@ourworldindata/grapher" import { ExpandableToggle } from "./ExpandableToggle.js" import ReactDOM from "react-dom" import { GrapherWithFallback } from "./GrapherWithFallback.js" -import { formatAuthors } from "./clientFormatting.js" import { ArticleBlocks } from "./gdocs/ArticleBlocks.js" import { RelatedCharts } from "./blocks/RelatedCharts.js" import { @@ -16,7 +15,8 @@ import { EnrichedBlockList, uniq, pick, - capitalize, + OwidOrigin, + formatAuthors, } from "@ourworldindata/utils" import { markdownToEnrichedTextBlock } from "@ourworldindata/components" import { AttachmentsContext, DocumentContext } from "./gdocs/OwidGdoc.js" @@ -76,19 +76,26 @@ const getDateRange = (dateRange: string): string | null => { return null } -const slugify_topic = (topic: string) => { +export const slugify_topic = (topic: string) => { // This is a heuristic to map from free form tag texts to topic page URLs. We'll // have to switch to explicitly stored URLs or explicit links between tags and topic pages // soon but for the time being this makes sure that "CO2 & Greenhouse Gas Emissions" can be automatically // linked to /co2-and-greenhouse-gas-emissions // Note that the heuristic fails for a few cases like "HIV/AIDS" or "Mpox (Monkeypox)" - const replaced = topic - .replace("&", "-and-") - .replace("'", "") - .replace("+", "") + const replaced = topic.replace("&", "and").replace("'", "").replace("+", "") return slugify(replaced) } +type OriginSubset = Pick< + OwidOrigin, + | "producer" + | "descriptionSnapshot" + | "dateAccessed" + | "urlMain" + | "description" + | "citationFull" +> + export const DataPageV2Content = ({ datapageData, grapherConfig, @@ -148,7 +155,7 @@ export const DataPageV2Content = ({ // TODO: this is missing the attribution field ATM and // so assembles something only roughly similar to the citation described // by Joe. Also, we need the dataset title. - const origins = uniq( + const origins: OriginSubset[] = uniq( datapageData.origins.map((item) => pick(item, [ "producer", @@ -160,20 +167,26 @@ export const DataPageV2Content = ({ ]) ) ) - const producers = uniq(datapageData.origins.map((o) => o.producer)).join( - "; " - ) - const attributionLong = datapageData.attribution ?? producers + const producers = uniq(datapageData.origins.map((o) => o.producer)) + + const attributionFragments = datapageData.attributions ?? producers + const attributionPotentiallyShortened = + attributionFragments.length > 3 + ? `${attributionFragments[0]} and other sources` + : attributionFragments.join(", ") // const attributionShort = // datapageData.attributionShort ?? // uniq( // datapageData.origins.map((o) => o.attributionShort ?? o.producer) // ).join("; ") const processedAdapted = - datapageData.owidProcessingLevel === "minor" ? `minor` : `major` + datapageData.owidProcessingLevel === "minor" + ? `minor processing` + : `major adaptations` const lastUpdated = dayjs(datapageData.lastUpdated, ["YYYY", "YYYY-MM-DD"]) const yearOfUpdate = lastUpdated.year() - const citationShort = `${attributionLong} — with ${processedAdapted} processing by Our World In Data (${yearOfUpdate})` + const citationShort = `${attributionPotentiallyShortened} – with ${processedAdapted} by Our World In Data (${yearOfUpdate})` + const citationLonger = `${attributionPotentiallyShortened} – with ${processedAdapted} by Our World In Data (${yearOfUpdate})` const originsLong = uniq( datapageData.origins.map( (o) => `${o.producer}, ${o.title ?? o.titleSnapshot}` @@ -181,11 +194,7 @@ export const DataPageV2Content = ({ ).join("; ") const today = dayjs().format("MMMM D, YYYY") const currentYear = dayjs().year() - const citationLong = `${citationShort}. ${ - datapageData.title - }. ${originsLong}. ${capitalize( - processedAdapted - )} processing by Our World In Data. Retrieved ${today} from ${canonicalUrl}` + const citationLong = `${citationLonger}. ${datapageData.title}. ${originsLong}. Retrieved ${today} from ${canonicalUrl}` const { linkedDocuments = {}, @@ -223,9 +232,31 @@ export const DataPageV2Content = ({ /> ) : null + const citationFullBlockFn = (source: OriginSubset) => { + source.citationFull && ( +
+
Citation
+ This is the citation of the original data obtained from the + source, prior to any processing or adaptation by Our World in + Data. To cite data downloaded from this page, please use the + suggested citation given in{" "} + Reuse This Work below. + +
+ ) + } + const dateRange = getDateRange(datapageData.dateRange) - const citationDatapage = `Our World In Data (${currentYear}). Data Page: ${datapageData.title} – ${producers}. Retrieved from ${canonicalUrl} [online resource]` + const citationDatapage = datapageData.primaryTopic + ? `“Data Page: ${datapageData.title}”, part of the following publication: ${datapageData.primaryTopic.citation}. Data adapted from ${producers}. Retrieved from ${canonicalUrl} [online resource]` + : `“Data Page: ${datapageData.title}”. Our World in Data (${currentYear}). Data adapted from ${producers}. Retrieved from ${canonicalUrl} [online resource]` + return ( Source -
{datapageData.attribution}
- {datapageData.owidProcessingLevel && ( -
- with{" "} - - {processedAdapted} - {" processing"} - {" "} - by Our World In Data -
- )} +
+ {datapageData.attributions} – with{" "} + + {processedAdapted} + {" "} + by Our World In Data +
@@ -660,71 +687,8 @@ export const DataPageV2Content = ({
)} - {source.citationFull && ( -
-
- Citation -
- This - is - the - citation - of - the - original - data - obtained - from - the - source, - prior - to - any - processing - or - adaptation - by - Our - World - in - Data. - To - cite - data - downloaded - from - this - page, - please - use - the - suggested - citation - given - in{" "} - - Reuse - This - Work - {" "} - below. - -
+ {citationFullBlockFn( + source )} )} @@ -909,7 +873,8 @@ export const DataPageV2Content = ({

To cite this page overall, including any - descriptions of the data + descriptions, FAQs or + explanations of the data authored by Our World in Data, please use the following citation: diff --git a/site/gdocs/OwidGdoc.tsx b/site/gdocs/OwidGdoc.tsx index a89a47472f4..990e734e17a 100644 --- a/site/gdocs/OwidGdoc.tsx +++ b/site/gdocs/OwidGdoc.tsx @@ -13,14 +13,14 @@ import { LICENSE_ID, isEmpty, OwidGdocType, + formatAuthors, } from "@ourworldindata/utils" import { CodeSnippet } from "../blocks/CodeSnippet.js" import { BAKED_BASE_URL } from "../../settings/clientSettings.js" -import { formatAuthors } from "../clientFormatting.js" import { DebugProvider } from "./DebugContext.js" import { OwidGdocHeader } from "./OwidGdocHeader.js" import StickyNav from "../blocks/StickyNav.js" - +import { getShortPageCitation } from "./utils.js" export const AttachmentsContext = createContext<{ linkedCharts: Record linkedDocuments: Record @@ -64,11 +64,12 @@ export function OwidGdoc({ }: OwidGdocProps) { const citationDescription = citationDescriptionsByArticleType[content.type ?? OwidGdocType.Article] - const citationText = `${formatAuthors({ - authors: content.authors, - })} (${publishedAt?.getFullYear()}) - "${ - content.title - }". Published online at OurWorldInData.org. Retrieved from: '${`${BAKED_BASE_URL}/${slug}`}' [Online Resource]` + const shortPageCitation = getShortPageCitation( + content.authors, + content.title ?? "", + publishedAt + ) + const citationText = `${shortPageCitation} Published online at OurWorldInData.org. Retrieved from: '${`${BAKED_BASE_URL}/${slug}`}' [Online Resource]` const bibtex = `@article{owid-${slug.replace(/\//g, "-")}, author = {${formatAuthors({ diff --git a/site/gdocs/utils.tsx b/site/gdocs/utils.tsx index 54494923019..124a81d309a 100644 --- a/site/gdocs/utils.tsx +++ b/site/gdocs/utils.tsx @@ -7,8 +7,9 @@ import { OwidGdocInterface, ImageMetadata, LinkedChart, - OwidGdocContent, Url, + OwidGdocContent, + formatAuthors, } from "@ourworldindata/utils" import { match } from "ts-pattern" import { AttachmentsContext } from "./OwidGdoc.js" @@ -198,3 +199,13 @@ export function renderSpan( export function renderSpans(spans: Span[]): JSX.Element[] { return spans.map(renderSpan) } + +export function getShortPageCitation( + authors: string[], + title: string, + publishedAt: Date | null +) { + return `${formatAuthors({ + authors: authors, + })} (${publishedAt?.getFullYear()}) - “${title}”` +}