Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Request DOI metadata as CSL-JSON after BibTeX #1073

Merged
merged 21 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/many-pianos-approve.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"citation-js-utils": minor
"myst-to-jats": patch
"myst-cli": patch
---

Load citations from CSL and non-CSL
5 changes: 5 additions & 0 deletions .changeset/ten-rockets-buy.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'mystmd': patch
---

Add end-to-end tests for various DOIs
113 changes: 86 additions & 27 deletions packages/citation-js-utils/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { OutputOptions } from '@citation-js/core';
import { Cite } from '@citation-js/core';
import { clean as cleanCSL } from '@citation-js/core/lib/plugins/input/csl.js';
import sanitizeHtml from 'sanitize-html';

import '@citation-js/plugin-bibtex';
Expand All @@ -8,10 +8,10 @@ import '@citation-js/plugin-csl';
const DOI_IN_TEXT = /(10.\d{4,9}\/[-._;()/:A-Z0-9]*[A-Z0-9])/i;

// This is duplicated in citation-js types, which are not exported
export type CitationJson = {
export type CSL = {
type?: 'article-journal' | string;
id: string;
author?: { given: string; family: string }[];
author?: { given: string; family: string; literal?: string }[];
issued?: { 'date-parts'?: number[][]; literal?: string };
publisher?: string;
title?: string;
Expand Down Expand Up @@ -47,14 +47,6 @@ function cleanRef(citation: string) {
return cleanHtml.replace(/^1\./g, '').replace(/&/g, '&').trim();
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
const defaultOpts: OutputOptions = {
format: 'string',
type: 'json',
style: 'ris',
lang: 'en-US',
};

export enum CitationJSStyles {
'apa' = 'citation-apa',
'vancouver' = 'citation-vancouver',
Expand All @@ -66,22 +58,15 @@ export enum InlineCite {
't' = 't',
}

const defaultString: OutputOptions = {
format: 'string',
lang: 'en-US',
type: 'html',
style: CitationJSStyles.apa,
};

export function yearFromCitation(data: CitationJson) {
export function yearFromCitation(data: CSL) {
let year: number | string | undefined = data.issued?.['date-parts']?.[0]?.[0];
if (year) return year;
year = data.issued?.['literal']?.match(/\b[12][0-9]{3}\b/)?.[0];
if (year) return year;
return 'n.d.';
}

export function getInlineCitation(data: CitationJson, kind: InlineCite, opts?: InlineOptions) {
export function getInlineCitation(data: CSL, kind: InlineCite, opts?: InlineOptions) {
let authors = data.author;
if (!authors || authors.length === 0) {
authors = data.editor;
Expand Down Expand Up @@ -112,7 +97,7 @@ export function getInlineCitation(data: CitationJson, kind: InlineCite, opts?: I
}
if (authors.length > 2) {
return [
{ type: 'text', value: `${prefix}${authors[0].family} ` },
{ type: 'text', value: `${prefix}${authors[0].family ?? authors[0].literal} ` },
{ type: 'emphasis', children: [{ type: 'text', value: 'et al.' }] },
{ type: 'text', value: `${yearPart}` },
];
Expand All @@ -129,7 +114,9 @@ export type CitationRenderer = Record<
inline: (kind?: InlineCite, opts?: InlineOptions) => InlineNode[];
getDOI: () => string | undefined;
getURL: () => string | undefined;
cite: CitationJson;
cite: CSL;
getLabel: () => string;
exportBibTeX: () => string;
}
>;

Expand Down Expand Up @@ -171,12 +158,58 @@ export function firstNonDoiUrl(str?: string, doi?: string) {
return matches.map((match) => match[0]).find((match) => !doi || !match.includes(doi));
}

/**
* Parse a citation style of the form `citation-<style>` into its `<style>`
*
* @param style: citation style string
*/
function parseCitationStyle(style: string): string {
const [styleType, styleFormat] = style.split('-');
agoose77 marked this conversation as resolved.
Show resolved Hide resolved
if (styleType !== 'citation') {
throw new Error(`unexpected citation style: ${style}`);
}
return styleFormat;
}

/**
* Parse a BibTeX string into an array of CSL items
*
* @param source - BibTeX string
*
*/
export function parseBibTeX(source: string): CSL[] {
return new Cite(source).data;
}

/**
* Parse CSL-JSON into an array of "clean" CSL items
*
* @param source - array of unclean CSL items
*/
export function parseCSLJSON(source: object[]): CSL[] {
return cleanCSL(source);
}

/**
* Compatability shim for existing callers of getCitations
* Replaced by getCitationRenderers
*
* @param bibtex - BibTeX string
*/
export async function getCitations(bibtex: string): Promise<CitationRenderer> {
const cite = new Cite();
agoose77 marked this conversation as resolved.
Show resolved Hide resolved
const p = await Cite.async(bibtex);
const csl = parseBibTeX(bibtex);
return await getCitationRenderers(csl);
}

/**
* Build renderers for the given array of CSL items
*
* @param data - array of CSL items
*/
export async function getCitationRenderers(data: CSL[]): Promise<CitationRenderer> {
const cite = new Cite();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now this function creates renderers for CSL (i.e. offloads parsing)

return Object.fromEntries(
p.data.map((c: any): [string, CitationRenderer[0]] => {
data.map((c): [string, CitationRenderer[0]] => {
const matchDoi = c.URL?.match(DOI_IN_TEXT) ?? c.note?.match(DOI_IN_TEXT);
if (!c.DOI && matchDoi) {
c.DOI = matchDoi[0];
Expand All @@ -189,17 +222,43 @@ export async function getCitations(bibtex: string): Promise<CitationRenderer> {
},
render(style?: CitationJSStyles) {
return replaceUrlsWithAnchorElement(
cleanRef(cite.set(c).get({ ...defaultString, style: style ?? CitationJSStyles.apa })),
cleanRef(
cite.set(c).format('bibliography', {
template: parseCitationStyle(style ?? (CitationJSStyles.apa as string)),
agoose77 marked this conversation as resolved.
Show resolved Hide resolved
format: 'html',
lang: 'en-US',
}) as string,
),
c.DOI,
);
},
getDOI(): string | undefined {
return c.DOI || undefined;
},
getURL(): string | undefined {
return firstNonDoiUrl(cleanRef(cite.set(c).get(defaultString)), c.DOI) ?? doiUrl(c.DOI);
return (
firstNonDoiUrl(
cleanRef(
cite.set(c).format('bibliography', {
template: parseCitationStyle(CitationJSStyles.apa as string),
format: 'html',
lang: 'en-US',
}) as string,
),
c.DOI,
) ?? doiUrl(c.DOI)
);
},
cite: c,
getLabel(): string {
const bibtexObjects = cite.set(c).format('bibtex', { format: 'object' }) as {
label: string;
}[];
return bibtexObjects[0]?.label;
},
exportBibTeX(): string {
return cite.set(c).format('bibtex', { format: 'text' }) as string;
},
},
];
}),
Expand Down
17 changes: 13 additions & 4 deletions packages/citation-js-utils/tests/basic.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import { describe, expect, it } from 'vitest';
import { getCitations, CitationJSStyles, yearFromCitation, firstNonDoiUrl } from '../src';
import {
getCitationRenderers,
parseBibTeX,
CitationJSStyles,
yearFromCitation,
firstNonDoiUrl,
} from '../src';
import {
bibtex,
doiInNote,
Expand All @@ -13,23 +19,26 @@ const key = 'Cockett2015SimPEG';

describe('Test reference rendering', () => {
it('APA', async () => {
const citations = await getCitations(bibtex);
const data = parseBibTeX(bibtex);
const citations = await getCitationRenderers(data);
expect(Object.keys(citations).length).toBe(1);
const cite = citations[key];
expect(cite.render()).toEqual(TEST_APA_HTML);
expect(cite.render(CitationJSStyles.apa)).toEqual(TEST_APA_HTML);
expect(cite.getDOI()).toEqual('10.1016/j.cageo.2015.09.015');
});
it('Vancouver', async () => {
const citations = await getCitations(bibtex);
const data = parseBibTeX(bibtex);
const citations = await getCitationRenderers(data);
const cite = citations[key];
expect(cite.render(CitationJSStyles.vancouver)).toEqual(TEST_VANCOUVER_HTML);
});
it.each([
['url', doiInURL],
['note', doiInNote],
])('Extract the DOI from the %s', async (_, src) => {
const citations = await getCitations(src);
const data = parseBibTeX(src);
const citations = await getCitationRenderers(data);
expect(citations['cury2020sparse'].getDOI()).toBe(TEST_DOI_IN_OTHER_FIELD);
});
});
Expand Down
11 changes: 7 additions & 4 deletions packages/citation-js-utils/types/citation-js/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
// https://fettblog.eu/typescript-react-extending-jsx-elements/
declare module '@citation-js/plugin-bibtex' {}
declare module '@citation-js/plugin-csl' {}
declare module '@citation-js/core/lib/plugins/input/csl.js' {
export function clean(data: any): any {}
}
declare module '@citation-js/core' {
export type OutputOptions = {
format: 'string';
Expand Down Expand Up @@ -38,13 +41,13 @@ declare module '@citation-js/core' {
} & Record<string, any>;

export class Cite {
constructor(input?: string | CSL);
constructor(input?: any);
agoose77 marked this conversation as resolved.
Show resolved Hide resolved

static async(data: string | Cite): Promise<Cite>;
static async(data: any): Promise<Cite>;

set(data: string | Cite): this;
set(data: any): this;

get: (opts: OutputOptions) => string;
format: (format: string, options: any) => string | object[];

data: CSL[];
}
Expand Down
35 changes: 0 additions & 35 deletions packages/myst-cli/src/build/utils/bibtex.spec.ts

This file was deleted.

30 changes: 1 addition & 29 deletions packages/myst-cli/src/build/utils/bibtex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,6 @@ import type { ISession } from '../../session/types.js';
import { addWarningForFile } from '../../utils/addWarningForFile.js';
import type { References } from 'myst-common';

/**
* Extract a single entry from the entire content of a bibtex file
*
* Look for the pattern '@article{key' then finds the closing bracket
* and returns that substring. The "article" prefix may be any
* alpha word.
*/
export function extractBibtex(key: string, bibtex: string) {
const match = bibtex.match(new RegExp(`@[a-zA-Z]*{${key}`, 'g'));
if (!match) return;
const start = bibtex.indexOf(match[0]);
let bracketCount = 0;
let ind = start + match[0].length;
while (bibtex[ind] && (bibtex[ind] !== '}' || bracketCount !== 0)) {
if (bibtex[ind - 1] && bibtex[ind - 1] !== '\\') {
if (bibtex[ind] === '{') bracketCount++;
if (bibtex[ind] === '}') bracketCount--;
}
ind++;
}
return bibtex[ind] ? bibtex.substring(start, ind + 1) : undefined;
}

/**
* Write new bibtex file from citation renderer data and reference order
*
Expand All @@ -48,12 +25,7 @@ export function writeBibtexFromCitationRenderers(
const citationLookup: Record<string, string> = {};
Object.values(cache.$citationRenderers).forEach((renderers) => {
Object.entries(renderers).forEach(([key, renderer]) => {
const bibtexContent = (renderer.cite._graph as any[]).find((item) => {
return item.type === '@biblatex/text';
});
if (bibtexContent?.data) {
citationLookup[key] = extractBibtex(key, bibtexContent.data) ?? bibtexContent.data;
}
citationLookup[key] = renderer.exportBibTeX();
});
});
const bibtexContent: string[] = [];
Expand Down
10 changes: 7 additions & 3 deletions packages/myst-cli/src/process/citations.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import fs from 'node:fs';
import type { CitationRenderer } from 'citation-js-utils';
import { getCitations } from 'citation-js-utils';
import { getCitationRenderers, parseBibTeX } from 'citation-js-utils';
import { tic, isUrl } from 'myst-cli-utils';
import { RuleId, plural } from 'myst-common';
import type { ISession, ISessionWithCache } from '../session/types.js';
import { castSession } from '../session/cache.js';
import { selectors } from '../store/index.js';
import { addWarningForFile } from '../utils/addWarningForFile.js';

export async function loadCitations(session: ISession, path: string): Promise<CitationRenderer> {
export async function loadBibTeXCitationRenderers(
session: ISession,
path: string,
): Promise<CitationRenderer> {
const toc = tic();
let data: string;
if (isUrl(path)) {
Expand All @@ -23,7 +26,8 @@ export async function loadCitations(session: ISession, path: string): Promise<Ci
session.log.debug(`Loading citations at "${path}"`);
data = fs.readFileSync(path).toString();
}
const renderer = await getCitations(data);
const csl = parseBibTeX(data);
const renderer = await getCitationRenderers(csl);
session.log.debug(toc(`Read ${plural('%s citations(s)', renderer)} from ${path} in %s.`));
return renderer;
}
Expand Down
Loading
Loading