Skip to content

Commit

Permalink
wip: fix up citations
Browse files Browse the repository at this point in the history
  • Loading branch information
agoose77 committed Apr 8, 2024
1 parent e13e685 commit 23adc24
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 48 deletions.
53 changes: 34 additions & 19 deletions packages/citation-js-utils/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { OutputOptions } from '@citation-js/core';
import type { OutputOptions, CSL } from '@citation-js/core';
import { Cite } from '@citation-js/core';
import sanitizeHtml from 'sanitize-html';

Expand All @@ -11,7 +11,7 @@ const DOI_IN_TEXT = /(10.\d{4,9}\/[-._;()/:A-Z0-9]*[A-Z0-9])/i;
export type CitationJson = {
type?: 'article-journal' | string;
id: string;
author?: { given: string; family: string }[];
author?: { given: string; family: string, literal?: string }[];
issued?: { 'date-parts'?: number[][]; literal?: string };
publisher?: string;
title?: string;
Expand Down Expand Up @@ -47,14 +47,6 @@ function cleanRef(citation: string) {
return cleanHtml.replace(/^1\./g, '').replace(/&/g, '&').trim();
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
const defaultOpts: OutputOptions = {
format: 'string',
type: 'json',
style: 'ris',
lang: 'en-US',
};

export enum CitationJSStyles {
'apa' = 'citation-apa',
'vancouver' = 'citation-vancouver',
Expand All @@ -66,8 +58,7 @@ export enum InlineCite {
't' = 't',
}

const defaultString: OutputOptions = {
format: 'string',
const defaultOptions: Record<string, string> = {
lang: 'en-US',
type: 'html',
style: CitationJSStyles.apa,
Expand Down Expand Up @@ -112,7 +103,7 @@ export function getInlineCitation(data: CitationJson, kind: InlineCite, opts?: I
}
if (authors.length > 2) {
return [
{ type: 'text', value: `${prefix}${authors[0].family} ` },
{ type: 'text', value: `${prefix}${authors[0].family ?? authors[0].literal} ` },
{ type: 'emphasis', children: [{ type: 'text', value: 'et al.' }] },
{ type: 'text', value: `${yearPart}` },
];
Expand Down Expand Up @@ -171,12 +162,19 @@ export function firstNonDoiUrl(str?: string, doi?: string) {
return matches.map((match) => match[0]).find((match) => !doi || !match.includes(doi));
}

export async function getCitations(bibtex: string): Promise<CitationRenderer> {
const cite = new Cite();
const p = await Cite.async(bibtex);
function parseCitationStyle(style: string): string {
const [styleType, styleFormat] = style.split('-');
if (styleType !== 'citation') {
throw new Error(`unexpected citation style: ${style}`);
}
return styleFormat;
}

export async function getCitationRenderers(data: any): Promise<CitationRenderer> {
const cite = new Cite();
const p = await Cite.async(data);
return Object.fromEntries(
p.data.map((c: any): [string, CitationRenderer[0]] => {
p.data.map((c: CSL): [string, CitationRenderer[0]] => {
const matchDoi = c.URL?.match(DOI_IN_TEXT) ?? c.note?.match(DOI_IN_TEXT);
if (!c.DOI && matchDoi) {
c.DOI = matchDoi[0];
Expand All @@ -189,15 +187,32 @@ export async function getCitations(bibtex: string): Promise<CitationRenderer> {
},
render(style?: CitationJSStyles) {
return replaceUrlsWithAnchorElement(
cleanRef(cite.set(c).get({ ...defaultString, style: style ?? CitationJSStyles.apa })),
cleanRef(
cite.set(c).format('bibliography', {
template: parseCitationStyle(style ?? (CitationJSStyles.apa as string)),
format: 'html',
lang: 'en-US',
}) as string,
),
c.DOI,
);
},
getDOI(): string | undefined {
return c.DOI || undefined;
},
getURL(): string | undefined {
return firstNonDoiUrl(cleanRef(cite.set(c).get(defaultString)), c.DOI) ?? doiUrl(c.DOI);
return (
firstNonDoiUrl(
cleanRef(
cite.set(c).format('bibliography', {
template: parseCitationStyle(CitationJSStyles.apa as string),
format: 'html',
lang: 'en-US',
}) as string,
),
c.DOI,
) ?? doiUrl(c.DOI)
);
},
cite: c,
},
Expand Down
8 changes: 4 additions & 4 deletions packages/citation-js-utils/types/citation-js/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ declare module '@citation-js/core' {
} & Record<string, any>;

export class Cite {
constructor(input?: string | CSL);
constructor(input?: any);

static async(data: string | Cite): Promise<Cite>;
static async(data: any): Promise<Cite>;

set(data: string | Cite): this;
set(data: any): this;

get: (opts: OutputOptions) => string;
format: (format: string, options: Any) => string | object[];

data: CSL[];
}
Expand Down
6 changes: 3 additions & 3 deletions packages/myst-cli/src/process/citations.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import fs from 'node:fs';
import type { CitationRenderer } from 'citation-js-utils';
import { getCitations } from 'citation-js-utils';
import { getCitationRenderers } from 'citation-js-utils';
import { tic, isUrl } from 'myst-cli-utils';
import { RuleId, plural } from 'myst-common';
import type { ISession, ISessionWithCache } from '../session/types.js';
import { castSession } from '../session/cache.js';
import { selectors } from '../store/index.js';
import { addWarningForFile } from '../utils/addWarningForFile.js';

export async function loadCitations(session: ISession, path: string): Promise<CitationRenderer> {
export async function loadBibTeXCitationRenderers(session: ISession, path: string): Promise<CitationRenderer> {
const toc = tic();
let data: string;
if (isUrl(path)) {
Expand All @@ -23,7 +23,7 @@ export async function loadCitations(session: ISession, path: string): Promise<Ci
session.log.debug(`Loading citations at "${path}"`);
data = fs.readFileSync(path).toString();
}
const renderer = await getCitations(data);
const renderer = await getCitationRenderers(data);
session.log.debug(toc(`Read ${plural('%s citations(s)', renderer)} from ${path} in %s.`));
return renderer;
}
Expand Down
4 changes: 2 additions & 2 deletions packages/myst-cli/src/process/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { warnings, watch } from '../store/reducers.js';
import type { PreRendererData, RendererData } from '../transforms/types.js';
import { logMessagesFromVFile } from '../utils/logging.js';
import { addWarningForFile } from '../utils/addWarningForFile.js';
import { loadCitations } from './citations.js';
import { loadBibTeXCitationRenderers } from './citations.js';
import { parseMyst } from './myst.js';
import { processNotebook } from './notebook.js';
import { includeDirectiveTransform } from 'myst-transforms';
Expand Down Expand Up @@ -107,7 +107,7 @@ export async function loadFile(
break;
}
case '.bib': {
const renderer = await loadCitations(session, file);
const renderer = await loadBibTeXCitationRenderers(session, file);
cache.$citationRenderers[file] = renderer;
break;
}
Expand Down
60 changes: 40 additions & 20 deletions packages/myst-cli/src/transforms/dois.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import fs from 'node:fs';
import { join } from 'node:path';
import type { CitationRenderer } from 'citation-js-utils';
import { getCitations } from 'citation-js-utils';
import type { CitationRenderer, CitationJson } from 'citation-js-utils';
import { getCitationRenderers } from 'citation-js-utils';
import { doi } from 'doi-utils';
import type { Link } from 'myst-spec';
import type { GenericNode, GenericParent } from 'myst-common';
Expand All @@ -27,27 +27,43 @@ function doiResolvesCacheFile(session: ISession, normalizedDoi: string) {
return join(cacheFolder, filename);
}

const DEFAULT_DOI_CONTENT = 'application/vnd.citationstyles.csl+json';

function patchCrossCiteCSL(data: any) {
const patchAuthor = (author: any) => {
if (author.name === undefined) {
return author;
}
const { name, ...rest } = author;
return { literal: name, ...rest };
};
const { author, ...rest } = data;

if (author === undefined) {
return data;
}
return { author: author.map(patchAuthor), ...rest };
}

/**
* Fetch bibtex entry for doi from doi.org using application/x-bibtex accept header
*/
export async function getDoiOrgBibtex(
session: ISession,
doiString: string,
): Promise<string | null> {
export async function getDoiOrgJSON(session: ISession, doiString: string): Promise<CitationJson | null> {
const normalizedDoi = doi.normalize(doiString);
const url = doi.buildUrl(normalizedDoi);
if (!doi.validate(doiString) || !normalizedDoi || !url) return null;
const cachePath = doiBibtexCacheFile(session, normalizedDoi);

if (fs.existsSync(cachePath)) {
const bibtex = fs.readFileSync(cachePath).toString();
session.log.debug(`Loaded cached reference bibtex for doi:${normalizedDoi}`);
return bibtex;
const cached = fs.readFileSync(cachePath).toString();
session.log.debug(`Loaded cached reference CSL JSON for doi:${normalizedDoi}`);
return JSON.parse(cached);
}
const toc = tic();
session.log.debug('Fetching DOI bibtex from doi.org');
session.log.debug('Fetching DOI CSL JSON from doi.org');
const response = await session
.fetch(url, {
headers: [['Accept', 'application/x-bibtex']],
headers: [['Accept', DEFAULT_DOI_CONTENT]],
})
.catch(() => {
session.log.debug(`Request to ${url} failed.`);
Expand All @@ -57,11 +73,15 @@ export async function getDoiOrgBibtex(
session.log.debug(`doi.org fetch failed for ${doiString}`);
return null;
}
const bibtex = await response.text();
session.log.debug(toc(`Fetched reference bibtex for doi:${normalizedDoi} in %s`));
session.log.debug(`Saving doi bibtex to cache ${cachePath}`);
fs.writeFileSync(cachePath, bibtex);
return bibtex;
let data = (await response.json()) as object;

// Some CSL publishers don't seem to conform
data = patchCrossCiteCSL(data);

session.log.debug(toc(`Fetched reference CSL JSON for doi:${normalizedDoi} in %s`));
session.log.debug(`Saving DOI CSL JSON to cache ${cachePath}`);
fs.writeFileSync(cachePath, JSON.stringify(data));
return data as unknown as CitationJson;
}

/**
Expand Down Expand Up @@ -99,14 +119,14 @@ export async function getCitation(
node: GenericNode,
): Promise<SingleCitationRenderer | null> {
if (!doi.validate(doiString)) return null;
const bibtex = await getDoiOrgBibtex(session, doiString);
if (!bibtex) {
const data = await getDoiOrgJSON(session, doiString);
if (!data) {
const resolves = await doiOrgResolves(session, doiString);
const normalizedDoi = doi.normalize(doiString);
let message: string;
let note: string | undefined;
if (resolves) {
message = `No bibtex available from doi.org for doi:${normalizedDoi}`;
message = `No CSL JSON available from doi.org for doi:${normalizedDoi}`;
note = `To resolve this error, visit ${doi.buildUrl(normalizedDoi)} and add citation info to .bib file`;
} else {
message = `Could not find DOI from link: ${doiString} as ${normalizedDoi}`;
Expand All @@ -118,7 +138,7 @@ export async function getCitation(
});
return null;
}
const renderer = await getCitations(bibtex);
const renderer = await getCitationRenderers(data);
const id = Object.keys(renderer)[0];
const render = renderer[id];
return { id, render };
Expand Down

0 comments on commit 23adc24

Please sign in to comment.