From 30c17c03a9b8030e072ff2dc05e99121639a8969 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Mon, 8 Apr 2024 15:47:15 +0100 Subject: [PATCH 01/21] feat: support CSL-JSON from doi.org too --- packages/citation-js-utils/src/index.ts | 66 +++++--- .../types/citation-js/index.d.ts | 8 +- packages/myst-cli/src/process/citations.ts | 7 +- packages/myst-cli/src/process/file.ts | 4 +- packages/myst-cli/src/transforms/dois.ts | 151 ++++++++++++++---- packages/myst-to-jats/src/backmatter.ts | 4 +- 6 files changed, 173 insertions(+), 67 deletions(-) diff --git a/packages/citation-js-utils/src/index.ts b/packages/citation-js-utils/src/index.ts index 40448b217..2cf788cfd 100644 --- a/packages/citation-js-utils/src/index.ts +++ b/packages/citation-js-utils/src/index.ts @@ -8,10 +8,10 @@ import '@citation-js/plugin-csl'; const DOI_IN_TEXT = /(10.\d{4,9}\/[-._;()/:A-Z0-9]*[A-Z0-9])/i; // This is duplicated in citation-js types, which are not exported -export type CitationJson = { +export type CSL = { type?: 'article-journal' | string; id: string; - author?: { given: string; family: string }[]; + author?: { given: string; family: string; literal?: string }[]; issued?: { 'date-parts'?: number[][]; literal?: string }; publisher?: string; title?: string; @@ -47,14 +47,6 @@ function cleanRef(citation: string) { return cleanHtml.replace(/^1\./g, '').replace(/&/g, '&').trim(); } -// eslint-disable-next-line @typescript-eslint/no-unused-vars -const defaultOpts: OutputOptions = { - format: 'string', - type: 'json', - style: 'ris', - lang: 'en-US', -}; - export enum CitationJSStyles { 'apa' = 'citation-apa', 'vancouver' = 'citation-vancouver', @@ -66,14 +58,13 @@ export enum InlineCite { 't' = 't', } -const defaultString: OutputOptions = { - format: 'string', +const defaultOptions: Record = { lang: 'en-US', type: 'html', style: CitationJSStyles.apa, }; -export function yearFromCitation(data: CitationJson) { +export function yearFromCitation(data: CSL) { let year: number | string | undefined = data.issued?.['date-parts']?.[0]?.[0]; if (year) return year; year = data.issued?.['literal']?.match(/\b[12][0-9]{3}\b/)?.[0]; @@ -81,7 +72,7 @@ export function yearFromCitation(data: CitationJson) { return 'n.d.'; } -export function getInlineCitation(data: CitationJson, kind: InlineCite, opts?: InlineOptions) { +export function getInlineCitation(data: CSL, kind: InlineCite, opts?: InlineOptions) { let authors = data.author; if (!authors || authors.length === 0) { authors = data.editor; @@ -112,7 +103,7 @@ export function getInlineCitation(data: CitationJson, kind: InlineCite, opts?: I } if (authors.length > 2) { return [ - { type: 'text', value: `${prefix}${authors[0].family} ` }, + { type: 'text', value: `${prefix}${authors[0].family ?? authors[0].literal} ` }, { type: 'emphasis', children: [{ type: 'text', value: 'et al.' }] }, { type: 'text', value: `${yearPart}` }, ]; @@ -129,7 +120,7 @@ export type CitationRenderer = Record< inline: (kind?: InlineCite, opts?: InlineOptions) => InlineNode[]; getDOI: () => string | undefined; getURL: () => string | undefined; - cite: CitationJson; + cite: CSL; } >; @@ -171,12 +162,26 @@ export function firstNonDoiUrl(str?: string, doi?: string) { return matches.map((match) => match[0]).find((match) => !doi || !match.includes(doi)); } -export async function getCitations(bibtex: string): Promise { - const cite = new Cite(); - const p = await Cite.async(bibtex); +function parseCitationStyle(style: string): string { + const [styleType, styleFormat] = style.split('-'); + if (styleType !== 'citation') { + throw new Error(`unexpected citation style: ${style}`); + } + return styleFormat; +} +export async function parseBibTeX(source: string): Promise { + return (await Cite.async(source)).data; +} + +export async function parseCSLJSON(source: object[]): Promise { + return (await Cite.async(source)).data; +} + +export async function getCitationRenderers(data: CSL[]): Promise { + const cite = new Cite(); return Object.fromEntries( - p.data.map((c: any): [string, CitationRenderer[0]] => { + data.map((c): [string, CitationRenderer[0]] => { const matchDoi = c.URL?.match(DOI_IN_TEXT) ?? c.note?.match(DOI_IN_TEXT); if (!c.DOI && matchDoi) { c.DOI = matchDoi[0]; @@ -189,7 +194,13 @@ export async function getCitations(bibtex: string): Promise { }, render(style?: CitationJSStyles) { return replaceUrlsWithAnchorElement( - cleanRef(cite.set(c).get({ ...defaultString, style: style ?? CitationJSStyles.apa })), + cleanRef( + cite.set(c).format('bibliography', { + template: parseCitationStyle(style ?? (CitationJSStyles.apa as string)), + format: 'html', + lang: 'en-US', + }) as string, + ), c.DOI, ); }, @@ -197,7 +208,18 @@ export async function getCitations(bibtex: string): Promise { return c.DOI || undefined; }, getURL(): string | undefined { - return firstNonDoiUrl(cleanRef(cite.set(c).get(defaultString)), c.DOI) ?? doiUrl(c.DOI); + return ( + firstNonDoiUrl( + cleanRef( + cite.set(c).format('bibliography', { + template: parseCitationStyle(CitationJSStyles.apa as string), + format: 'html', + lang: 'en-US', + }) as string, + ), + c.DOI, + ) ?? doiUrl(c.DOI) + ); }, cite: c, }, diff --git a/packages/citation-js-utils/types/citation-js/index.d.ts b/packages/citation-js-utils/types/citation-js/index.d.ts index b7233b313..f78ec82f6 100644 --- a/packages/citation-js-utils/types/citation-js/index.d.ts +++ b/packages/citation-js-utils/types/citation-js/index.d.ts @@ -38,13 +38,13 @@ declare module '@citation-js/core' { } & Record; export class Cite { - constructor(input?: string | CSL); + constructor(input?: any); - static async(data: string | Cite): Promise; + static async(data: any): Promise; - set(data: string | Cite): this; + set(data: any): this; - get: (opts: OutputOptions) => string; + format: (format: string, options: Any) => string | object[]; data: CSL[]; } diff --git a/packages/myst-cli/src/process/citations.ts b/packages/myst-cli/src/process/citations.ts index 824a5e009..c43b18d3f 100644 --- a/packages/myst-cli/src/process/citations.ts +++ b/packages/myst-cli/src/process/citations.ts @@ -1,6 +1,6 @@ import fs from 'node:fs'; import type { CitationRenderer } from 'citation-js-utils'; -import { getCitations } from 'citation-js-utils'; +import { getCitationRenderers, parseBibTeX } from 'citation-js-utils'; import { tic, isUrl } from 'myst-cli-utils'; import { RuleId, plural } from 'myst-common'; import type { ISession, ISessionWithCache } from '../session/types.js'; @@ -8,7 +8,7 @@ import { castSession } from '../session/cache.js'; import { selectors } from '../store/index.js'; import { addWarningForFile } from '../utils/addWarningForFile.js'; -export async function loadCitations(session: ISession, path: string): Promise { +export async function loadBibTeXCitationRenderers(session: ISession, path: string): Promise { const toc = tic(); let data: string; if (isUrl(path)) { @@ -23,7 +23,8 @@ export async function loadCitations(session: ISession, path: string): Promise { - const normalizedDoi = doi.normalize(doiString); - const url = doi.buildUrl(doiString); // This must be based on the incoming string, not the normalizedDoi. (e.g. short DOIs) - if (!doi.validate(doiString) || !normalizedDoi || !url) return null; - const cachePath = doiBibtexCacheFile(session, normalizedDoi); - if (fs.existsSync(cachePath)) { - const bibtex = fs.readFileSync(cachePath).toString(); - session.log.debug(`Loaded cached reference bibtex for doi:${normalizedDoi}`); - return bibtex; + url: string, +): Promise { + session.log.debug('Fetching DOI BibTeX from doi.org'); + const response = await session + .fetch(url, { + headers: [['Accept', BIBTEX_MIMETYPE]], + }) + .catch(() => { + session.log.debug(`Request to ${url} failed.`); + return undefined; + }); + if (!response || !response.ok) { + session.log.debug(`doi.org fetch failed for ${url}`); + return undefined; } - const toc = tic(); - session.log.debug('Fetching DOI bibtex from doi.org'); + let data = await response.text(); + return parseBibTeX(data); +} + +/** + * Resolve the given doi.org DOI URL into its CSL-JSON metadata + * + * @param session - CLI session + * @param url - doi.org DOI URL + */ + +export async function resolveDoiAsCSLJSON( + session: ISession, + url: string, +): Promise { + session.log.debug('Fetching DOI CSL JSON from doi.org'); const response = await session .fetch(url, { - headers: [['Accept', 'application/x-bibtex']], + headers: [['Accept', CSL_JSON_MIMETYPE]], }) .catch(() => { session.log.debug(`Request to ${url} failed.`); - return null; + return undefined; }); if (!response || !response.ok) { - session.log.debug(`doi.org fetch failed for ${doiString}`); - return null; + session.log.debug(`doi.org fetch failed for ${url}`); + return undefined; + } + let data = await response.json(); + // Return parse result of _array_ of CSL items + return parseCSLJSON([data as object]); +} + +/** + * Fetch CSL-JSON formatted metadata for the given doi.org DOI + * + * @param session - CLI session + * @param doiString - DOI + * @param vfile + * @param node + */ +export async function resolveDoiOrg( + session: ISession, + doiString: string, + vfile: VFile, + node: GenericNode, +): Promise { + const normalizedDoi = doi.normalize(doiString); + const url = doi.buildUrl(doiString); // This must be based on the incoming string, not the normalizedDoi. (e.g. short DOIs) + if (!doi.validate(doiString) || !normalizedDoi || !url) return undefined; + + // Cache DOI resolution as CSL JSON (parsed) + const cachePath = doiCSLJSONCacheFile(session, normalizedDoi); + + if (fs.existsSync(cachePath)) { + const cached = fs.readFileSync(cachePath).toString(); + session.log.debug(`Loaded cached reference CSL-JSON for doi:${normalizedDoi}`); + return JSON.parse(cached); + } + const toc = tic(); + + let data: CSL[] | undefined; + try { + data = await resolveDoiAsBibTeX(session, url); + session.log.debug(toc(`Fetched reference BibTeX for doi:${normalizedDoi} in %s`)); + } catch (error) { + fileWarn(vfile, `BibTeX from doi.org was malformed, trying CSL-JSON`); + + try { + data = await resolveDoiAsCSLJSON(session, url); + session.log.debug(toc(`Fetched reference CSL-JSON for doi:${normalizedDoi} in %s`)); + } catch (error) { + fileError( + vfile, + `BibTeX and CSL-JSON from doi.org was malformed, please edit and add to your local references`, + { + node, + ruleId: RuleId.doiLinkValid, + }, + ); + return undefined; + } } - const bibtex = await response.text(); - session.log.debug(toc(`Fetched reference bibtex for doi:${normalizedDoi} in %s`)); - session.log.debug(`Saving doi bibtex to cache ${cachePath}`); - fs.writeFileSync(cachePath, bibtex); - return bibtex; + session.log.debug(`Saving DOI CSL-JSON to cache ${cachePath}`); + fs.writeFileSync(cachePath, JSON.stringify(data)); + return data as unknown as CSL[]; } /** - * Fetch doi from doi.org to see if it resolves + * Fetch DOI from doi.org to see if it resolves */ export async function doiOrgResolves(session: ISession, doiString: string): Promise { const normalizedDoi = doi.normalize(doiString); @@ -99,14 +182,14 @@ export async function getCitation( node: GenericNode, ): Promise { if (!doi.validate(doiString)) return null; - const bibtex = await getDoiOrgBibtex(session, doiString); - if (!bibtex) { + const data = await resolveDoiOrg(session, doiString, vfile, node); + if (!data) { const resolves = await doiOrgResolves(session, doiString); const normalizedDoi = doi.normalize(doiString); let message: string; let note: string | undefined; if (resolves) { - message = `No bibtex available from doi.org for doi:${normalizedDoi}`; + message = `No CSL JSON available from doi.org for doi:${normalizedDoi}`; note = `To resolve this error, visit ${doi.buildUrl(normalizedDoi)} and add citation info to .bib file`; } else { message = `Could not find DOI from link: ${doiString} as ${normalizedDoi}`; @@ -119,18 +202,18 @@ export async function getCitation( return null; } try { - const renderer = await getCitations(bibtex); + const renderer = await getCitationRenderers(data); const id = Object.keys(renderer)[0]; const render = renderer[id]; return { id, render }; } catch (error) { fileError( vfile, - `BibTeX from doi.org was malformed, please edit and add to your local references`, + `Citation data from doi.org was malformed, please edit and add to your local references`, { node, ruleId: RuleId.doiLinkValid, - note: `\nBibTeX from ${doiString}:\n\n${bibtex}\n`, + note: `\Citation data from ${doiString}:\n\n${JSON.stringify(data)}\n`, }, ); return null; diff --git a/packages/myst-to-jats/src/backmatter.ts b/packages/myst-to-jats/src/backmatter.ts index 1bbbeb27e..adebfa102 100644 --- a/packages/myst-to-jats/src/backmatter.ts +++ b/packages/myst-to-jats/src/backmatter.ts @@ -1,7 +1,7 @@ -import type { CitationRenderer, CitationJson } from 'citation-js-utils'; +import type { CitationRenderer, CSL } from 'citation-js-utils'; import type { Element, IJatsSerializer } from './types.js'; -export function citeToJatsRef(state: IJatsSerializer, key: string, data: CitationJson): Element { +export function citeToJatsRef(state: IJatsSerializer, key: string, data: CSL): Element { const publicationType = !data.type || data.type === 'article-journal' ? 'journal' : data.type; const elements: Element[] = []; const authors: Element[] | undefined = data.author From 0cfc14645c76a3d6501f724d1c58cc4a6e67c337 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 9 Apr 2024 13:59:53 +0100 Subject: [PATCH 02/21] chore: appease linter --- packages/myst-cli/src/process/citations.ts | 7 +++++-- packages/myst-cli/src/transforms/dois.ts | 8 ++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/packages/myst-cli/src/process/citations.ts b/packages/myst-cli/src/process/citations.ts index c43b18d3f..bf1ef4039 100644 --- a/packages/myst-cli/src/process/citations.ts +++ b/packages/myst-cli/src/process/citations.ts @@ -8,7 +8,10 @@ import { castSession } from '../session/cache.js'; import { selectors } from '../store/index.js'; import { addWarningForFile } from '../utils/addWarningForFile.js'; -export async function loadBibTeXCitationRenderers(session: ISession, path: string): Promise { +export async function loadBibTeXCitationRenderers( + session: ISession, + path: string, +): Promise { const toc = tic(); let data: string; if (isUrl(path)) { @@ -23,7 +26,7 @@ export async function loadBibTeXCitationRenderers(session: ISession, path: strin session.log.debug(`Loading citations at "${path}"`); data = fs.readFileSync(path).toString(); } - const csl = await parseBibTeX(data); + const csl = await parseBibTeX(data); const renderer = await getCitationRenderers(csl); session.log.debug(toc(`Read ${plural('%s citations(s)', renderer)} from ${path} in %s.`)); return renderer; diff --git a/packages/myst-cli/src/transforms/dois.ts b/packages/myst-cli/src/transforms/dois.ts index b090fd56f..772a5386f 100644 --- a/packages/myst-cli/src/transforms/dois.ts +++ b/packages/myst-cli/src/transforms/dois.ts @@ -59,7 +59,7 @@ export async function resolveDoiAsBibTeX( session.log.debug(`doi.org fetch failed for ${url}`); return undefined; } - let data = await response.text(); + const data = await response.text(); return parseBibTeX(data); } @@ -87,7 +87,7 @@ export async function resolveDoiAsCSLJSON( session.log.debug(`doi.org fetch failed for ${url}`); return undefined; } - let data = await response.json(); + const data = await response.json(); // Return parse result of _array_ of CSL items return parseCSLJSON([data as object]); } @@ -130,7 +130,7 @@ export async function resolveDoiOrg( try { data = await resolveDoiAsCSLJSON(session, url); session.log.debug(toc(`Fetched reference CSL-JSON for doi:${normalizedDoi} in %s`)); - } catch (error) { + } catch (errorCSL) { fileError( vfile, `BibTeX and CSL-JSON from doi.org was malformed, please edit and add to your local references`, @@ -213,7 +213,7 @@ export async function getCitation( { node, ruleId: RuleId.doiLinkValid, - note: `\Citation data from ${doiString}:\n\n${JSON.stringify(data)}\n`, + note: `Citation data from ${doiString}:\n\n${JSON.stringify(data)}\n`, }, ); return null; From 15e4231009ad5260e9a8054f91dba02f82ee955b Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 9 Apr 2024 14:04:54 +0100 Subject: [PATCH 03/21] test: fix test suite --- packages/citation-js-utils/tests/basic.spec.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/packages/citation-js-utils/tests/basic.spec.ts b/packages/citation-js-utils/tests/basic.spec.ts index d5bc1e659..f7f1a8687 100644 --- a/packages/citation-js-utils/tests/basic.spec.ts +++ b/packages/citation-js-utils/tests/basic.spec.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest'; -import { getCitations, CitationJSStyles, yearFromCitation, firstNonDoiUrl } from '../src'; +import { getCitationRenderers, parseBibTeX, CitationJSStyles, yearFromCitation, firstNonDoiUrl } from '../src'; import { bibtex, doiInNote, @@ -13,7 +13,8 @@ const key = 'Cockett2015SimPEG'; describe('Test reference rendering', () => { it('APA', async () => { - const citations = await getCitations(bibtex); + const data = await parseBibTeX(bibtex); + const citations = await getCitationRenderers(data); expect(Object.keys(citations).length).toBe(1); const cite = citations[key]; expect(cite.render()).toEqual(TEST_APA_HTML); @@ -21,15 +22,17 @@ describe('Test reference rendering', () => { expect(cite.getDOI()).toEqual('10.1016/j.cageo.2015.09.015'); }); it('Vancouver', async () => { - const citations = await getCitations(bibtex); + const data = await parseBibTeX(bibtex); + const citations = await getCitationRenderers(data); const cite = citations[key]; expect(cite.render(CitationJSStyles.vancouver)).toEqual(TEST_VANCOUVER_HTML); }); it.each([ ['url', doiInURL], ['note', doiInNote], - ])('Extract the DOI from the %s', async (_, src) => { - const citations = await getCitations(src); + ])('Extract the DOI from the %s', async (_, src) => { + const data = await parseBibTeX(src); + const citations = await getCitationRenderers(data); expect(citations['cury2020sparse'].getDOI()).toBe(TEST_DOI_IN_OTHER_FIELD); }); }); From f7055b008c2f52ca66dcd1b7f54910b42e17ad73 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 9 Apr 2024 14:06:14 +0100 Subject: [PATCH 04/21] chore: changeset --- .changeset/many-pianos-approve.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/many-pianos-approve.md diff --git a/.changeset/many-pianos-approve.md b/.changeset/many-pianos-approve.md new file mode 100644 index 000000000..20003ce11 --- /dev/null +++ b/.changeset/many-pianos-approve.md @@ -0,0 +1,7 @@ +--- +"citation-js-utils": major +"myst-to-jats": patch +"myst-cli": patch +--- + +Load citations from CSL and non-CSL From 86ade98638bd8b579b180814dba84383b1535ab1 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 9 Apr 2024 15:04:20 +0100 Subject: [PATCH 05/21] fix: run built-in cleanup --- packages/citation-js-utils/src/index.ts | 3 +- .../citation-js-utils/tests/basic.spec.ts | 10 ++++- .../types/citation-js/index.d.ts | 3 ++ packages/myst-cli/src/transforms/doi.spec.ts | 38 ++++++++++++++----- 4 files changed, 41 insertions(+), 13 deletions(-) diff --git a/packages/citation-js-utils/src/index.ts b/packages/citation-js-utils/src/index.ts index 2cf788cfd..d189e2792 100644 --- a/packages/citation-js-utils/src/index.ts +++ b/packages/citation-js-utils/src/index.ts @@ -1,5 +1,6 @@ import type { OutputOptions } from '@citation-js/core'; import { Cite } from '@citation-js/core'; +import { clean as cleanCSL } from '@citation-js/core/lib/plugins/input/csl.js'; import sanitizeHtml from 'sanitize-html'; import '@citation-js/plugin-bibtex'; @@ -175,7 +176,7 @@ export async function parseBibTeX(source: string): Promise { } export async function parseCSLJSON(source: object[]): Promise { - return (await Cite.async(source)).data; + return Promise.resolve(cleanCSL(source)); } export async function getCitationRenderers(data: CSL[]): Promise { diff --git a/packages/citation-js-utils/tests/basic.spec.ts b/packages/citation-js-utils/tests/basic.spec.ts index f7f1a8687..f8608267b 100644 --- a/packages/citation-js-utils/tests/basic.spec.ts +++ b/packages/citation-js-utils/tests/basic.spec.ts @@ -1,5 +1,11 @@ import { describe, expect, it } from 'vitest'; -import { getCitationRenderers, parseBibTeX, CitationJSStyles, yearFromCitation, firstNonDoiUrl } from '../src'; +import { + getCitationRenderers, + parseBibTeX, + CitationJSStyles, + yearFromCitation, + firstNonDoiUrl, +} from '../src'; import { bibtex, doiInNote, @@ -30,7 +36,7 @@ describe('Test reference rendering', () => { it.each([ ['url', doiInURL], ['note', doiInNote], - ])('Extract the DOI from the %s', async (_, src) => { + ])('Extract the DOI from the %s', async (_, src) => { const data = await parseBibTeX(src); const citations = await getCitationRenderers(data); expect(citations['cury2020sparse'].getDOI()).toBe(TEST_DOI_IN_OTHER_FIELD); diff --git a/packages/citation-js-utils/types/citation-js/index.d.ts b/packages/citation-js-utils/types/citation-js/index.d.ts index f78ec82f6..2582f7352 100644 --- a/packages/citation-js-utils/types/citation-js/index.d.ts +++ b/packages/citation-js-utils/types/citation-js/index.d.ts @@ -2,6 +2,9 @@ // https://fettblog.eu/typescript-react-extending-jsx-elements/ declare module '@citation-js/plugin-bibtex' {} declare module '@citation-js/plugin-csl' {} +declare module '@citation-js/core/lib/plugins/input/csl.js' { + export function clean(data: any): any {} +} declare module '@citation-js/core' { export type OutputOptions = { format: 'string'; diff --git a/packages/myst-cli/src/transforms/doi.spec.ts b/packages/myst-cli/src/transforms/doi.spec.ts index 4eb98e167..cde819fa9 100644 --- a/packages/myst-cli/src/transforms/doi.spec.ts +++ b/packages/myst-cli/src/transforms/doi.spec.ts @@ -1,26 +1,44 @@ import { describe, expect, it } from 'vitest'; import { Session } from '../session'; -import { getDoiOrgBibtex } from './dois'; +import { resolveDoiAsBibTeX, resolveDoiAsCSLJSON } from './dois'; -const PRIESTLEY_1972 = - '@article{PRIESTLEY_1972, title={On the Assessment of Surface Heat Flux and Evaporation Using Large-Scale Parameters}, volume={100}, ISSN={1520-0493}, url={http://dx.doi.org/10.1175/1520-0493(1972)100<0081:OTAOSH>2.3.CO;2}, DOI={10.1175/1520-0493(1972)100<0081:otaosh>2.3.co;2}, number={2}, journal={Monthly Weather Review}, publisher={American Meteorological Society}, author={PRIESTLEY, C. H. B. and TAYLOR, R. J.}, year={1972}, month=feb, pages={81–92} }'; -describe('DOI Resolvers', () => { +const PRIESTLEY_1972_CSL_JSON = [ + { + 'container-title': 'Monthly Weather Review', + author: [ + { given: 'C. H. B.', family: 'PRIESTLEY' }, + { given: 'R. J.', family: 'TAYLOR' }, + ], + DOI: '10.1175/1520-0493(1972)100<0081:otaosh>2.3.co;2', + type: 'article-journal', + issue: '2', + issued: { 'date-parts': [[1972, 2]] }, + page: '81-92', + publisher: 'American Meteorological Society', + title: 'On the Assessment of Surface Heat Flux and Evaporation Using Large-Scale Parameters', + volume: '100', + }, +]; +describe.each([ + { resolver: resolveDoiAsBibTeX, name: 'BibTeX' }, + { resolver: resolveDoiAsCSLJSON, name: 'CSL-JSON' }, +])('DOI Resolvers for $name', ({ resolver, name }) => { it('short DOI resolves', async () => { - const bibtex = await getDoiOrgBibtex(new Session(), 'https://doi.org/cr3qwn'); - expect(bibtex?.trim()).toEqual(PRIESTLEY_1972); + const data = await resolver(new Session(), 'https://doi.org/cr3qwn'); + expect(data).toMatchObject(PRIESTLEY_1972_CSL_JSON); }); it('url encoded DOI resolves', async () => { - const bibtex = await getDoiOrgBibtex( + const data = await resolver( new Session(), 'https://doi.org/10.1175%2F1520-0493%281972%29100%3C0081%3AOTAOSH%3E2.3.CO%3B2', ); - expect(bibtex?.trim()).toEqual(PRIESTLEY_1972); + expect(data).toMatchObject(PRIESTLEY_1972_CSL_JSON); }); it('markdown link with strange characters resolves', async () => { - const bibtex = await getDoiOrgBibtex( + const data = await resolver( new Session(), 'https://doi.org/10.1175/1520-0493(1972)100<0081:OTAOSH>2.3.CO;2', ); - expect(bibtex?.trim()).toEqual(PRIESTLEY_1972); + expect(data).toMatchObject(PRIESTLEY_1972_CSL_JSON); }); }); From 61cd46975e7a82e25bf6c81ed7e3aa9ade5c8d12 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 9 Apr 2024 15:20:42 +0100 Subject: [PATCH 06/21] refactor: rename DOI function --- packages/myst-cli/src/transforms/doi.spec.ts | 6 +++--- packages/myst-cli/src/transforms/dois.ts | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/myst-cli/src/transforms/doi.spec.ts b/packages/myst-cli/src/transforms/doi.spec.ts index cde819fa9..9d0138e95 100644 --- a/packages/myst-cli/src/transforms/doi.spec.ts +++ b/packages/myst-cli/src/transforms/doi.spec.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from 'vitest'; import { Session } from '../session'; -import { resolveDoiAsBibTeX, resolveDoiAsCSLJSON } from './dois'; +import { resolveDOIAsBibTeX, resolveDOIAsCSLJSON } from './dois'; const PRIESTLEY_1972_CSL_JSON = [ { @@ -20,8 +20,8 @@ const PRIESTLEY_1972_CSL_JSON = [ }, ]; describe.each([ - { resolver: resolveDoiAsBibTeX, name: 'BibTeX' }, - { resolver: resolveDoiAsCSLJSON, name: 'CSL-JSON' }, + { resolver: resolveDOIAsBibTeX, name: 'BibTeX' }, + { resolver: resolveDOIAsCSLJSON, name: 'CSL-JSON' }, ])('DOI Resolvers for $name', ({ resolver, name }) => { it('short DOI resolves', async () => { const data = await resolver(new Session(), 'https://doi.org/cr3qwn'); diff --git a/packages/myst-cli/src/transforms/dois.ts b/packages/myst-cli/src/transforms/dois.ts index 772a5386f..47d87ee05 100644 --- a/packages/myst-cli/src/transforms/dois.ts +++ b/packages/myst-cli/src/transforms/dois.ts @@ -42,7 +42,7 @@ function doiResolvesCacheFile(session: ISession, normalizedDoi: string) { * @param session - CLI session * @param url - doi.org DOI URL */ -export async function resolveDoiAsBibTeX( +export async function resolveDOIAsBibTeX( session: ISession, url: string, ): Promise { @@ -70,7 +70,7 @@ export async function resolveDoiAsBibTeX( * @param url - doi.org DOI URL */ -export async function resolveDoiAsCSLJSON( +export async function resolveDOIAsCSLJSON( session: ISession, url: string, ): Promise { @@ -122,13 +122,13 @@ export async function resolveDoiOrg( let data: CSL[] | undefined; try { - data = await resolveDoiAsBibTeX(session, url); + data = await resolveDOIAsBibTeX(session, url); session.log.debug(toc(`Fetched reference BibTeX for doi:${normalizedDoi} in %s`)); } catch (error) { fileWarn(vfile, `BibTeX from doi.org was malformed, trying CSL-JSON`); try { - data = await resolveDoiAsCSLJSON(session, url); + data = await resolveDOIAsCSLJSON(session, url); session.log.debug(toc(`Fetched reference CSL-JSON for doi:${normalizedDoi} in %s`)); } catch (errorCSL) { fileError( From d62481d2122577a872c143e8cca78ea629c959fc Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 9 Apr 2024 15:24:53 +0100 Subject: [PATCH 07/21] Update packages/myst-cli/src/transforms/dois.ts --- packages/myst-cli/src/transforms/dois.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/myst-cli/src/transforms/dois.ts b/packages/myst-cli/src/transforms/dois.ts index 47d87ee05..5c36e5e43 100644 --- a/packages/myst-cli/src/transforms/dois.ts +++ b/packages/myst-cli/src/transforms/dois.ts @@ -189,7 +189,7 @@ export async function getCitation( let message: string; let note: string | undefined; if (resolves) { - message = `No CSL JSON available from doi.org for doi:${normalizedDoi}`; + message = `No CSL-JSON available from doi.org for doi:${normalizedDoi}`; note = `To resolve this error, visit ${doi.buildUrl(normalizedDoi)} and add citation info to .bib file`; } else { message = `Could not find DOI from link: ${doiString} as ${normalizedDoi}`; From da1a9d39e1c2af24cc1b75205bca018f6a3a9496 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 10 Apr 2024 10:31:25 +0100 Subject: [PATCH 08/21] Update packages/myst-cli/src/transforms/dois.ts Co-authored-by: Franklin Koch --- packages/myst-cli/src/transforms/dois.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/myst-cli/src/transforms/dois.ts b/packages/myst-cli/src/transforms/dois.ts index 5c36e5e43..9f997d9a2 100644 --- a/packages/myst-cli/src/transforms/dois.ts +++ b/packages/myst-cli/src/transforms/dois.ts @@ -189,7 +189,7 @@ export async function getCitation( let message: string; let note: string | undefined; if (resolves) { - message = `No CSL-JSON available from doi.org for doi:${normalizedDoi}`; + message = `No citation data available from doi.org for doi:${normalizedDoi}`; note = `To resolve this error, visit ${doi.buildUrl(normalizedDoi)} and add citation info to .bib file`; } else { message = `Could not find DOI from link: ${doiString} as ${normalizedDoi}`; From 6f5f47f646b04c348f77d595290049343f15d51b Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 10 Apr 2024 09:44:09 +0100 Subject: [PATCH 09/21] chore: add docstrings --- packages/citation-js-utils/src/index.ts | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/packages/citation-js-utils/src/index.ts b/packages/citation-js-utils/src/index.ts index d189e2792..b33f9419c 100644 --- a/packages/citation-js-utils/src/index.ts +++ b/packages/citation-js-utils/src/index.ts @@ -163,6 +163,12 @@ export function firstNonDoiUrl(str?: string, doi?: string) { return matches.map((match) => match[0]).find((match) => !doi || !match.includes(doi)); } + +/** + * Parse a citation style of the form `citation-