Skip to content

Commit

Permalink
Expanding arXiv identification
Browse files Browse the repository at this point in the history
  • Loading branch information
tdegeus committed Feb 9, 2024
1 parent 5f0a95e commit ab07153
Showing 1 changed file with 21 additions and 6 deletions.
27 changes: 21 additions & 6 deletions GooseBib/bibtex.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ def _get_arxivid(entry: dict) -> str:
if "arxivid" in entry:
return entry["arxivid"]

if "eprint" in entry and entry.get("archiveprefix", "").lower() == "arxiv":
return entry["eprint"]

return recognise.arxivid(
*[val for key, val in entry.items() if key not in ["doi", "DISPLAY_ORDER", "INDENT"]]
)
Expand Down Expand Up @@ -294,7 +297,7 @@ def selection(use_bibtexparser: bool = False) -> dict:
if use_bibtexparser:
base += ["ID", "ENTRYTYPE", "DISPLAY_ORDER", "INDENT"]

base += ["author", "title", "year", "doi", "arxivid"]
base += ["author", "title", "year", "doi", "arxivid", "eprint", "archiveprefix"]
book = ["booktitle", "editor", "publisher", "volume", "pages"]

return dict(
Expand Down Expand Up @@ -328,10 +331,10 @@ def select(
If a list is specified all entry types are treated the same.
:param ensure_link:
Add URL to ``fields`` if no ``doi`` or ``arxivid`` is present.
Add URL to ``fields`` if no ``doi``, ``arxivid``, or , ``eprint`` is present.
:param remove_url:
Remove URL when either a ``doi`` or an ``arxivid`` is present.
Remove URL when either a ```doi``, ``arxivid``, or , ``eprint`` is present.
"""

if fields is None:
Expand All @@ -349,15 +352,15 @@ def select(

if ensure_link:
if "url" not in select:
if "doi" not in entry and "arxivid" not in entry:
if "doi" not in entry and "arxivid" not in entry and "eprint" not in entry:
select.append("url")

rm = [key for key in entry if key not in select]
for key in rm:
del entry[key]

if remove_url:
if "url" in entry and ("doi" in entry or "arxivid" in entry):
if "url" in entry and ("doi" in entry or "arxivid" in entry or "eprint" in entry):
del entry["url"]

return data
Expand Down Expand Up @@ -718,6 +721,14 @@ def clean(
elif entry["doi"] == "10.48550/arXiv." + entry["arxivid"]:
del entry["arxivid"]

if "eprint" in entry and entry.get("archiveprefix", "").lower() == "arxiv":
if "doi" not in entry:
entry["doi"] = "10.48550/arXiv." + entry.pop("eprint")
del entry["archiveprefix"]
elif entry["doi"] == "10.48550/arXiv." + entry["eprint"]:
del entry["eprint"]
del entry["archiveprefix"]

# fix author abbreviations
if entry["ID"] not in no_abbreviate:
for key in ["author", "editor"]:
Expand Down Expand Up @@ -911,7 +922,9 @@ def format_journal_arxiv(

if "arxivid" in entry:
arxivid = entry["arxivid"]
elif "doi" in entry:
elif "eprint" in entry and entry.get("archiveprefix", "").lower() == "arxiv":
arxivid = entry["eprint"]
elif re.match(search, entry.get("doi", "None")):
arxivid = re.split(search, entry["doi"])
if len(arxivid) != 4:
continue
Expand Down Expand Up @@ -1554,6 +1567,8 @@ def dbsearch_arxiv(
iden = get_identifiers(entry)
if "arxivid" in iden:
continue
if "eprint" in iden and iden.get("archiveprefix", "").lower() == "arxiv":
continue
if "doi" not in iden:
continue
doi = iden["doi"]
Expand Down

0 comments on commit ab07153

Please sign in to comment.