From 23adc24fad6b1ad4ad1f71dfa89d1874aaf9ca97 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Mon, 8 Apr 2024 15:47:15 +0100 Subject: [PATCH] wip: fix up citations --- packages/citation-js-utils/src/index.ts | 53 ++++++++++------ .../types/citation-js/index.d.ts | 8 +-- packages/myst-cli/src/process/citations.ts | 6 +- packages/myst-cli/src/process/file.ts | 4 +- packages/myst-cli/src/transforms/dois.ts | 60 ++++++++++++------- 5 files changed, 83 insertions(+), 48 deletions(-) diff --git a/packages/citation-js-utils/src/index.ts b/packages/citation-js-utils/src/index.ts index 40448b217..cd2d4f06c 100644 --- a/packages/citation-js-utils/src/index.ts +++ b/packages/citation-js-utils/src/index.ts @@ -1,4 +1,4 @@ -import type { OutputOptions } from '@citation-js/core'; +import type { OutputOptions, CSL } from '@citation-js/core'; import { Cite } from '@citation-js/core'; import sanitizeHtml from 'sanitize-html'; @@ -11,7 +11,7 @@ const DOI_IN_TEXT = /(10.\d{4,9}\/[-._;()/:A-Z0-9]*[A-Z0-9])/i; export type CitationJson = { type?: 'article-journal' | string; id: string; - author?: { given: string; family: string }[]; + author?: { given: string; family: string, literal?: string }[]; issued?: { 'date-parts'?: number[][]; literal?: string }; publisher?: string; title?: string; @@ -47,14 +47,6 @@ function cleanRef(citation: string) { return cleanHtml.replace(/^1\./g, '').replace(/&/g, '&').trim(); } -// eslint-disable-next-line @typescript-eslint/no-unused-vars -const defaultOpts: OutputOptions = { - format: 'string', - type: 'json', - style: 'ris', - lang: 'en-US', -}; - export enum CitationJSStyles { 'apa' = 'citation-apa', 'vancouver' = 'citation-vancouver', @@ -66,8 +58,7 @@ export enum InlineCite { 't' = 't', } -const defaultString: OutputOptions = { - format: 'string', +const defaultOptions: Record = { lang: 'en-US', type: 'html', style: CitationJSStyles.apa, @@ -112,7 +103,7 @@ export function getInlineCitation(data: CitationJson, kind: InlineCite, opts?: I } if (authors.length > 2) { return [ - { type: 'text', value: `${prefix}${authors[0].family} ` }, + { type: 'text', value: `${prefix}${authors[0].family ?? authors[0].literal} ` }, { type: 'emphasis', children: [{ type: 'text', value: 'et al.' }] }, { type: 'text', value: `${yearPart}` }, ]; @@ -171,12 +162,19 @@ export function firstNonDoiUrl(str?: string, doi?: string) { return matches.map((match) => match[0]).find((match) => !doi || !match.includes(doi)); } -export async function getCitations(bibtex: string): Promise { - const cite = new Cite(); - const p = await Cite.async(bibtex); +function parseCitationStyle(style: string): string { + const [styleType, styleFormat] = style.split('-'); + if (styleType !== 'citation') { + throw new Error(`unexpected citation style: ${style}`); + } + return styleFormat; +} +export async function getCitationRenderers(data: any): Promise { + const cite = new Cite(); + const p = await Cite.async(data); return Object.fromEntries( - p.data.map((c: any): [string, CitationRenderer[0]] => { + p.data.map((c: CSL): [string, CitationRenderer[0]] => { const matchDoi = c.URL?.match(DOI_IN_TEXT) ?? c.note?.match(DOI_IN_TEXT); if (!c.DOI && matchDoi) { c.DOI = matchDoi[0]; @@ -189,7 +187,13 @@ export async function getCitations(bibtex: string): Promise { }, render(style?: CitationJSStyles) { return replaceUrlsWithAnchorElement( - cleanRef(cite.set(c).get({ ...defaultString, style: style ?? CitationJSStyles.apa })), + cleanRef( + cite.set(c).format('bibliography', { + template: parseCitationStyle(style ?? (CitationJSStyles.apa as string)), + format: 'html', + lang: 'en-US', + }) as string, + ), c.DOI, ); }, @@ -197,7 +201,18 @@ export async function getCitations(bibtex: string): Promise { return c.DOI || undefined; }, getURL(): string | undefined { - return firstNonDoiUrl(cleanRef(cite.set(c).get(defaultString)), c.DOI) ?? doiUrl(c.DOI); + return ( + firstNonDoiUrl( + cleanRef( + cite.set(c).format('bibliography', { + template: parseCitationStyle(CitationJSStyles.apa as string), + format: 'html', + lang: 'en-US', + }) as string, + ), + c.DOI, + ) ?? doiUrl(c.DOI) + ); }, cite: c, }, diff --git a/packages/citation-js-utils/types/citation-js/index.d.ts b/packages/citation-js-utils/types/citation-js/index.d.ts index b7233b313..f78ec82f6 100644 --- a/packages/citation-js-utils/types/citation-js/index.d.ts +++ b/packages/citation-js-utils/types/citation-js/index.d.ts @@ -38,13 +38,13 @@ declare module '@citation-js/core' { } & Record; export class Cite { - constructor(input?: string | CSL); + constructor(input?: any); - static async(data: string | Cite): Promise; + static async(data: any): Promise; - set(data: string | Cite): this; + set(data: any): this; - get: (opts: OutputOptions) => string; + format: (format: string, options: Any) => string | object[]; data: CSL[]; } diff --git a/packages/myst-cli/src/process/citations.ts b/packages/myst-cli/src/process/citations.ts index 824a5e009..3658b82f1 100644 --- a/packages/myst-cli/src/process/citations.ts +++ b/packages/myst-cli/src/process/citations.ts @@ -1,6 +1,6 @@ import fs from 'node:fs'; import type { CitationRenderer } from 'citation-js-utils'; -import { getCitations } from 'citation-js-utils'; +import { getCitationRenderers } from 'citation-js-utils'; import { tic, isUrl } from 'myst-cli-utils'; import { RuleId, plural } from 'myst-common'; import type { ISession, ISessionWithCache } from '../session/types.js'; @@ -8,7 +8,7 @@ import { castSession } from '../session/cache.js'; import { selectors } from '../store/index.js'; import { addWarningForFile } from '../utils/addWarningForFile.js'; -export async function loadCitations(session: ISession, path: string): Promise { +export async function loadBibTeXCitationRenderers(session: ISession, path: string): Promise { const toc = tic(); let data: string; if (isUrl(path)) { @@ -23,7 +23,7 @@ export async function loadCitations(session: ISession, path: string): Promise { + if (author.name === undefined) { + return author; + } + const { name, ...rest } = author; + return { literal: name, ...rest }; + }; + const { author, ...rest } = data; + + if (author === undefined) { + return data; + } + return { author: author.map(patchAuthor), ...rest }; +} + /** * Fetch bibtex entry for doi from doi.org using application/x-bibtex accept header */ -export async function getDoiOrgBibtex( - session: ISession, - doiString: string, -): Promise { +export async function getDoiOrgJSON(session: ISession, doiString: string): Promise { const normalizedDoi = doi.normalize(doiString); const url = doi.buildUrl(normalizedDoi); if (!doi.validate(doiString) || !normalizedDoi || !url) return null; const cachePath = doiBibtexCacheFile(session, normalizedDoi); + if (fs.existsSync(cachePath)) { - const bibtex = fs.readFileSync(cachePath).toString(); - session.log.debug(`Loaded cached reference bibtex for doi:${normalizedDoi}`); - return bibtex; + const cached = fs.readFileSync(cachePath).toString(); + session.log.debug(`Loaded cached reference CSL JSON for doi:${normalizedDoi}`); + return JSON.parse(cached); } const toc = tic(); - session.log.debug('Fetching DOI bibtex from doi.org'); + session.log.debug('Fetching DOI CSL JSON from doi.org'); const response = await session .fetch(url, { - headers: [['Accept', 'application/x-bibtex']], + headers: [['Accept', DEFAULT_DOI_CONTENT]], }) .catch(() => { session.log.debug(`Request to ${url} failed.`); @@ -57,11 +73,15 @@ export async function getDoiOrgBibtex( session.log.debug(`doi.org fetch failed for ${doiString}`); return null; } - const bibtex = await response.text(); - session.log.debug(toc(`Fetched reference bibtex for doi:${normalizedDoi} in %s`)); - session.log.debug(`Saving doi bibtex to cache ${cachePath}`); - fs.writeFileSync(cachePath, bibtex); - return bibtex; + let data = (await response.json()) as object; + + // Some CSL publishers don't seem to conform + data = patchCrossCiteCSL(data); + + session.log.debug(toc(`Fetched reference CSL JSON for doi:${normalizedDoi} in %s`)); + session.log.debug(`Saving DOI CSL JSON to cache ${cachePath}`); + fs.writeFileSync(cachePath, JSON.stringify(data)); + return data as unknown as CitationJson; } /** @@ -99,14 +119,14 @@ export async function getCitation( node: GenericNode, ): Promise { if (!doi.validate(doiString)) return null; - const bibtex = await getDoiOrgBibtex(session, doiString); - if (!bibtex) { + const data = await getDoiOrgJSON(session, doiString); + if (!data) { const resolves = await doiOrgResolves(session, doiString); const normalizedDoi = doi.normalize(doiString); let message: string; let note: string | undefined; if (resolves) { - message = `No bibtex available from doi.org for doi:${normalizedDoi}`; + message = `No CSL JSON available from doi.org for doi:${normalizedDoi}`; note = `To resolve this error, visit ${doi.buildUrl(normalizedDoi)} and add citation info to .bib file`; } else { message = `Could not find DOI from link: ${doiString} as ${normalizedDoi}`; @@ -118,7 +138,7 @@ export async function getCitation( }); return null; } - const renderer = await getCitations(bibtex); + const renderer = await getCitationRenderers(data); const id = Object.keys(renderer)[0]; const render = renderer[id]; return { id, render };