Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expanding arXiv identification #89

Merged
merged 1 commit into from
Feb 9, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions GooseBib/bibtex.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ def _get_arxivid(entry: dict) -> str:
if "arxivid" in entry:
return entry["arxivid"]

if "eprint" in entry and entry.get("archiveprefix", "").lower() == "arxiv":
return entry["eprint"]

return recognise.arxivid(
*[val for key, val in entry.items() if key not in ["doi", "DISPLAY_ORDER", "INDENT"]]
)
Expand Down Expand Up @@ -294,7 +297,7 @@ def selection(use_bibtexparser: bool = False) -> dict:
if use_bibtexparser:
base += ["ID", "ENTRYTYPE", "DISPLAY_ORDER", "INDENT"]

base += ["author", "title", "year", "doi", "arxivid"]
base += ["author", "title", "year", "doi", "arxivid", "eprint", "archiveprefix"]
book = ["booktitle", "editor", "publisher", "volume", "pages"]

return dict(
Expand Down Expand Up @@ -328,10 +331,10 @@ def select(
If a list is specified all entry types are treated the same.

:param ensure_link:
Add URL to ``fields`` if no ``doi`` or ``arxivid`` is present.
Add URL to ``fields`` if no ``doi``, ``arxivid``, or , ``eprint`` is present.

:param remove_url:
Remove URL when either a ``doi`` or an ``arxivid`` is present.
Remove URL when either a ```doi``, ``arxivid``, or , ``eprint`` is present.
"""

if fields is None:
Expand All @@ -349,15 +352,15 @@ def select(

if ensure_link:
if "url" not in select:
if "doi" not in entry and "arxivid" not in entry:
if "doi" not in entry and "arxivid" not in entry and "eprint" not in entry:
select.append("url")

rm = [key for key in entry if key not in select]
for key in rm:
del entry[key]

if remove_url:
if "url" in entry and ("doi" in entry or "arxivid" in entry):
if "url" in entry and ("doi" in entry or "arxivid" in entry or "eprint" in entry):
del entry["url"]

return data
Expand Down Expand Up @@ -718,6 +721,14 @@ def clean(
elif entry["doi"] == "10.48550/arXiv." + entry["arxivid"]:
del entry["arxivid"]

if "eprint" in entry and entry.get("archiveprefix", "").lower() == "arxiv":
if "doi" not in entry:
entry["doi"] = "10.48550/arXiv." + entry.pop("eprint")
del entry["archiveprefix"]
elif entry["doi"] == "10.48550/arXiv." + entry["eprint"]:
del entry["eprint"]
del entry["archiveprefix"]

# fix author abbreviations
if entry["ID"] not in no_abbreviate:
for key in ["author", "editor"]:
Expand Down Expand Up @@ -911,7 +922,9 @@ def format_journal_arxiv(

if "arxivid" in entry:
arxivid = entry["arxivid"]
elif "doi" in entry:
elif "eprint" in entry and entry.get("archiveprefix", "").lower() == "arxiv":
arxivid = entry["eprint"]
elif re.match(search, entry.get("doi", "None")):
arxivid = re.split(search, entry["doi"])
if len(arxivid) != 4:
continue
Expand Down Expand Up @@ -1554,6 +1567,8 @@ def dbsearch_arxiv(
iden = get_identifiers(entry)
if "arxivid" in iden:
continue
if "eprint" in iden and iden.get("archiveprefix", "").lower() == "arxiv":
continue
if "doi" not in iden:
continue
doi = iden["doi"]
Expand Down
Loading