From 23964b86935b1fcd65b8a1af4b5581a70b641239 Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Thu, 3 Sep 2020 16:10:32 +0100
Subject: [PATCH 01/13] Add package to FunctionReference

---
 r2t2/core.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/r2t2/core.py b/r2t2/core.py
index 210857d..a1cb60e 100755
--- a/r2t2/core.py
+++ b/r2t2/core.py
@@ -8,6 +8,7 @@ class FunctionReference(NamedTuple):
     name: str
     line: int
     source: str
+    package: str
     short_purpose: List[str]
     references: List[str]
 
@@ -80,13 +81,14 @@ def wrapper(wrapped, instance, args, kwargs):
         source = inspect.getsourcefile(wrapped)
         line = inspect.getsourcelines(wrapped)[1]
         identifier = f"{source}:{line}"
+        package = inspect.getmodule(inspect.stack()[1][0]).__name__.split(".")[0]
 
         if identifier in BIBLIOGRAPHY and ref in BIBLIOGRAPHY[identifier].references:
             return wrapped(*args, **kwargs)
 
         if identifier not in BIBLIOGRAPHY:
             BIBLIOGRAPHY[identifier] = FunctionReference(wrapped.__name__, line, source,
-                                                         [], [])
+                                                         package, [], [])
 
         BIBLIOGRAPHY[identifier].short_purpose.append(short_purpose)
         BIBLIOGRAPHY[identifier].references.append(ref)

From 9dde01a4ebceafcd339156cd37f02110331e2935 Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Thu, 3 Sep 2020 16:48:16 +0100
Subject: [PATCH 02/13] Add load of reference sources

---
 r2t2/core.py | 9 +++++++++
 setup.py     | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/r2t2/core.py b/r2t2/core.py
index 98448ca..810a180 100755
--- a/r2t2/core.py
+++ b/r2t2/core.py
@@ -4,6 +4,8 @@
 from functools import reduce
 from pathlib import Path
 
+import bibtexparser as bp
+
 
 class FunctionReference(NamedTuple):
     name: str
@@ -20,6 +22,7 @@ class Biblio(dict):
     def __init__(self):
         super().__init__()
         self._sources: Dict[str, Path] = {}
+        self._sources_loaded: Dict[str, bp.bibdatabase.BibDatabase] = {}
 
     def __str__(self):
         def add_record(out, record):
@@ -38,6 +41,7 @@ def add_record(out, record):
     def clear(self) -> None:
         super().clear()
         self._sources.clear()
+        self._sources_loaded.clear()
 
     @property
     def references(self):
@@ -79,6 +83,11 @@ def add_source(self, source: Union[str, Path]) -> None:
             )
         self._sources[package] = src
 
+    def load_source(self, package: str) -> None:
+        """Open the source for the given package."""
+        with self._sources[package].open("w+") as f:
+            self._sources_loaded[package] = bp.load(f)
+
 
 BIBLIOGRAPHY: Biblio = Biblio()
 
diff --git a/setup.py b/setup.py
index 3df903a..58b7cdc 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
     author="Research Computing Service, Imperial College London",
     author_email="rcs-support@imperial.ac.uk",
     url="https://github.com/ImperialCollegeLondon/R2T2",
-    install_requires=["wrapt"],
+    install_requires=["wrapt", "bibtexparser"],
     tests_require=tests_require,
     extras_require={"dev": tests_require + precommit},
     packages=find_packages("."),

From a44a5c8446ca4b63287bd782701c64c5e095f3d7 Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Thu, 3 Sep 2020 17:06:11 +0100
Subject: [PATCH 03/13] Add test for load source

---
 tests/test_core.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 5733a3b..7583203 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -86,7 +86,6 @@ def test_add_reference_from_doi(self, bib_with_tracking, decorated_with_doi):
 
 
 class TestAddSource:
-
     def test_add_source_exception_if_not_bibtex(self, bibliography, tmp_path):
         source = tmp_path / "my_source"
         with raises(ValueError):
@@ -110,3 +109,23 @@ def test_add_source(self, bibliography, tmp_path):
         bibliography.add_source(source)
         assert "tests" in bibliography._sources
         assert bibliography._sources["tests"] == source
+
+
+class TestLoadSource:
+
+    def test_load_source(self, bibliography, tmp_path):
+        source = tmp_path / "my_source.bib"
+        ref = """@misc{sulzer_marquis_timms_robinson_chapman_2020,
+ title={Python Battery Mathematical Modelling (PyBaMM)},
+ DOI={10.1149/osf.io/67ckj},
+ publisher={ECSarXiv},
+ author={Sulzer, Valentin and Marquis, Scott G and Timms, Robert and Robinson, Martin and Chapman, S. J},    # noqa: E501
+ year={2020},
+}
+}"""
+        with source.open("w") as f:
+            f.write(ref)
+
+        bibliography.add_source(source)
+        bibliography.load_source("tests")
+        assert "tests" in bibliography._sources_loaded

From 6c4e2301dee632b5214effda0f6b2be519a8763a Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Thu, 3 Sep 2020 18:47:02 +0100
Subject: [PATCH 04/13] Add load and save source

---
 r2t2/core.py       |  7 ++++++-
 tests/test_core.py | 20 +++++++++++++-------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/r2t2/core.py b/r2t2/core.py
index 810a180..9200e2a 100755
--- a/r2t2/core.py
+++ b/r2t2/core.py
@@ -85,9 +85,14 @@ def add_source(self, source: Union[str, Path]) -> None:
 
     def load_source(self, package: str) -> None:
         """Open the source for the given package."""
-        with self._sources[package].open("w+") as f:
+        with self._sources[package].open() as f:
             self._sources_loaded[package] = bp.load(f)
 
+    def save_source(self, package: str) -> None:
+        """Saves the (possibly modified) source for the given package."""
+        with self._sources[package].open() as f:
+             bp.dump(self._sources_loaded[package], f)
+
 
 BIBLIOGRAPHY: Biblio = Biblio()
 
diff --git a/tests/test_core.py b/tests/test_core.py
index 7583203..5e98ab4 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -115,17 +115,23 @@ class TestLoadSource:
 
     def test_load_source(self, bibliography, tmp_path):
         source = tmp_path / "my_source.bib"
-        ref = """@misc{sulzer_marquis_timms_robinson_chapman_2020,
- title={Python Battery Mathematical Modelling (PyBaMM)},
- DOI={10.1149/osf.io/67ckj},
- publisher={ECSarXiv},
- author={Sulzer, Valentin and Marquis, Scott G and Timms, Robert and Robinson, Martin and Chapman, S. J},    # noqa: E501
- year={2020},
+        ref = """@article{Sulzer_2019,
+    doi = {10.1149/2.0441908jes},
+    url = {https://doi.org/10.1149%2F2.0441908jes},
+    year = 2019,
+    publisher = {The Electrochemical Society},
+    volume = {166},
+    number = {12},
+    pages = {A2372--A2382},
+    author = {Valentin Sulzer et al.},
+    title = {Faster Lead-Acid Battery Simulations from Porous-Electrode Theory},
+    journal = {Journal of The Electrochemical Society},
 }
-}"""
+"""
         with source.open("w") as f:
             f.write(ref)
 
         bibliography.add_source(source)
         bibliography.load_source("tests")
         assert "tests" in bibliography._sources_loaded
+        assert "Sulzer_2019" in bibliography._sources_loaded["tests"].entries_dict

From 0b58cfe906e771a0b7a5bb7d580a7f4120cfc5e5 Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Thu, 3 Sep 2020 19:34:09 +0100
Subject: [PATCH 05/13] Add package attribute awareness to the parser

---
 docs/conf.py                             |  19 ++--
 docs/examples/minimal-class.py           |  12 ++-
 docs/examples/minimal-method.py          |  13 +--
 docs/examples/minimal.py                 |  10 +-
 r2t2/__main__.py                         |  33 +++---
 r2t2/core.py                             |  10 +-
 r2t2/docstring_parser.py                 |  34 +++---
 r2t2/docstring_reference_parser.py       |  27 ++---
 r2t2/plain_text_parser.py                |  12 +--
 r2t2/runtime_tracker.py                  |   3 +-
 r2t2/static_parser.py                    |   8 +-
 tests/conftest.py                        |   6 +-
 tests/test__main__.py                    |  59 +++--------
 tests/test_core.py                       |   1 -
 tests/test_docstring_parser.py           | 125 +++++++++++-----------
 tests/test_docstring_reference_parser.py | 129 +++++++++--------------
 tests/test_plain_text_parser.py          |  62 +++++------
 17 files changed, 246 insertions(+), 317 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 2072d51..9c7335e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -17,12 +17,12 @@
 
 # -- Project information -----------------------------------------------------
 
-project = 'Research References Tracking Tool (R2T2)'
-copyright = '2020, Research Computing Service, Imperial College London'
-author = 'Research Computing Service, Imperial College London'
+project = "Research References Tracking Tool (R2T2)"
+copyright = "2020, Research Computing Service, Imperial College London"
+author = "Research Computing Service, Imperial College London"
 
 # The full version, including alpha/beta/rc tags
-release = '0.3.1'
+release = "0.3.1"
 
 
 # -- General configuration ---------------------------------------------------
@@ -30,16 +30,15 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = [
-]
+extensions = []
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 # The master toctree document.
 master_doc = "index"
@@ -49,9 +48,9 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'alabaster'
+html_theme = "alabaster"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
diff --git a/docs/examples/minimal-class.py b/docs/examples/minimal-class.py
index 420c733..2761c96 100644
--- a/docs/examples/minimal-class.py
+++ b/docs/examples/minimal-class.py
@@ -1,9 +1,11 @@
 from r2t2 import add_reference
 
 
-@add_reference(short_purpose="Original implementation of R2T2",
-               reference="Diego Alonso-Álvarez, et al."
-               "(2018, February 27). Solcore (Version 5.1.0). Zenodo."
-               "http://doi.org/10.5281/zenodo.1185316")
-class MyGreatClass():
+@add_reference(
+    short_purpose="Original implementation of R2T2",
+    reference="Diego Alonso-Álvarez, et al."
+    "(2018, February 27). Solcore (Version 5.1.0). Zenodo."
+    "http://doi.org/10.5281/zenodo.1185316",
+)
+class MyGreatClass:
     pass
diff --git a/docs/examples/minimal-method.py b/docs/examples/minimal-method.py
index 7198662..74c4704 100644
--- a/docs/examples/minimal-method.py
+++ b/docs/examples/minimal-method.py
@@ -1,11 +1,12 @@
 from r2t2 import add_reference
 
 
-class MyGreatClass():
-
-    @add_reference(short_purpose="Original implementation of R2T2",
-                   reference="Diego Alonso-Álvarez, et al."
-                   "(2018, February 27). Solcore (Version 5.1.0). Zenodo."
-                   "http://doi.org/10.5281/zenodo.1185316")
+class MyGreatClass:
+    @add_reference(
+        short_purpose="Original implementation of R2T2",
+        reference="Diego Alonso-Álvarez, et al."
+        "(2018, February 27). Solcore (Version 5.1.0). Zenodo."
+        "http://doi.org/10.5281/zenodo.1185316",
+    )
     def my_great_function(self):
         pass
diff --git a/docs/examples/minimal.py b/docs/examples/minimal.py
index f60fff8..cd08301 100644
--- a/docs/examples/minimal.py
+++ b/docs/examples/minimal.py
@@ -1,10 +1,12 @@
 from r2t2 import add_reference
 
 
-@add_reference(short_purpose="Original implementation of R2T2",
-               reference="Diego Alonso-Álvarez, et al."
-               "(2018, February 27). Solcore (Version 5.1.0). Zenodo."
-               "http://doi.org/10.5281/zenodo.1185316")
+@add_reference(
+    short_purpose="Original implementation of R2T2",
+    reference="Diego Alonso-Álvarez, et al."
+    "(2018, February 27). Solcore (Version 5.1.0). Zenodo."
+    "http://doi.org/10.5281/zenodo.1185316",
+)
 def my_great_function():
     pass
 
diff --git a/r2t2/__main__.py b/r2t2/__main__.py
index c92f206..090fc06 100644
--- a/r2t2/__main__.py
+++ b/r2t2/__main__.py
@@ -10,7 +10,7 @@
 from .writers import REGISTERED_WRITERS
 from .docstring_reference_parser import (
     expand_file_list,
-    parse_and_add_docstring_references_from_files
+    parse_and_add_docstring_references_from_files,
 )
 
 
@@ -38,7 +38,7 @@ def add_common_arguments(parser: argparse.ArgumentParser):
         default="terminal",
         type=str,
         choices=sorted(REGISTERED_WRITERS.keys()),
-        help="Format of the output. Default: Terminal."
+        help="Format of the output. Default: Terminal.",
     )
     parser.add_argument(
         "--encoding",
@@ -53,11 +53,7 @@ def add_common_arguments(parser: argparse.ArgumentParser):
         help="File to save the references into. Ignored if format is 'Terminal'."
         " Default: [target folder]/references.",
     )
-    parser.add_argument(
-        "--debug",
-        action="store_true",
-        help="Enable debug logging"
-    )
+    parser.add_argument("--debug", action="store_true", help="Enable debug logging")
 
 
 class RunSubCommand(SubCommand):
@@ -102,27 +98,24 @@ def add_arguments(self, parser: argparse.ArgumentParser):
             "target",
             default=".",
             type=str,
-            help="Target file or folder to analyse."
-            " Default: Current directory.",
+            help="Target file or folder to analyse." " Default: Current directory.",
         )
 
     def run(self, args: argparse.Namespace):
         if args.notebook:
-            if not args.target.endswith('.ipynb'):
-                raise Exception("If --notebook flag is passed, target must be a"
-                                " Jupyter notebook!")
+            if not args.target.endswith(".ipynb"):
+                raise Exception(
+                    "If --notebook flag is passed, target must be a"
+                    " Jupyter notebook!"
+                )
         locate_references(args.target, encoding=args.encoding)
         if args.docstring or args.notebook:
             parse_and_add_docstring_references_from_files(
-                expand_file_list(args.target),
-                encoding=args.encoding
+                expand_file_list(args.target), encoding=args.encoding
             )
 
 
-SUB_COMMANDS: List[SubCommand] = [
-    RunSubCommand(),
-    StaticSubCommand()
-]
+SUB_COMMANDS: List[SubCommand] = [RunSubCommand(), StaticSubCommand()]
 
 SUB_COMMAND_BY_NAME: Dict[str, SubCommand] = {
     sub_command.name: sub_command for sub_command in SUB_COMMANDS
@@ -169,6 +162,6 @@ def main(argv: List[str] = None):
     run(args)
 
 
-if __name__ == '__main__':
-    logging.basicConfig(level='INFO')
+if __name__ == "__main__":
+    logging.basicConfig(level="INFO")
     main()
diff --git a/r2t2/core.py b/r2t2/core.py
index 25d6028..34b827b 100755
--- a/r2t2/core.py
+++ b/r2t2/core.py
@@ -117,14 +117,18 @@ def wrapper(wrapped, instance, args, kwargs):
         source = inspect.getsourcefile(wrapped)
         line = inspect.getsourcelines(wrapped)[1]
         identifier = f"{source}:{line}"
-        package = inspect.getmodule(inspect.stack()[1][0]).__name__.split(".")[0]
+        try:
+            package = inspect.getmodule(inspect.stack()[1][0]).__name__.split(".")[0]
+        except AttributeError:
+            package = ""
 
         if identifier in BIBLIOGRAPHY and ref in BIBLIOGRAPHY[identifier].references:
             return wrapped(*args, **kwargs)
 
         if identifier not in BIBLIOGRAPHY:
-            BIBLIOGRAPHY[identifier] = FunctionReference(wrapped.__name__, line, source,
-                                                         package, [], [])
+            BIBLIOGRAPHY[identifier] = FunctionReference(
+                wrapped.__name__, line, source, package, [], []
+            )
 
         BIBLIOGRAPHY[identifier].short_purpose.append(short_purpose)
         BIBLIOGRAPHY[identifier].references.append(ref)
diff --git a/r2t2/docstring_parser.py b/r2t2/docstring_parser.py
index 323e3e5..14fcabb 100644
--- a/r2t2/docstring_parser.py
+++ b/r2t2/docstring_parser.py
@@ -13,7 +13,7 @@
 """
 
 
-DEFAULT_ENCODING = 'utf-8'
+DEFAULT_ENCODING = "utf-8"
 
 
 class CodeDocumentComment(NamedTuple):
@@ -21,28 +21,31 @@ class CodeDocumentComment(NamedTuple):
     filename: Optional[str] = None
     lineno: Optional[int] = None
     name: Optional[str] = None
+    package: Optional[str] = None
 
 
 def iter_extract_docstring_from_text(
-    text: str, filename: str = None,
+    text: str,
+    filename: str = None,
     notebook: bool = False,
 ) -> Iterable[CodeDocumentComment]:
-    tree = ast.parse(text, filename=filename or '<unknown>')
+    tree = ast.parse(text, filename=filename or "<unknown>")
     for node in ast.walk(tree):
-        LOGGER.debug('node: %r', node)
+        LOGGER.debug("node: %r", node)
         try:
             node_docstring = ast.get_docstring(node)
-            LOGGER.debug('node_docstring: %r', node_docstring)
+            LOGGER.debug("node_docstring: %r", node_docstring)
             if node_docstring:
                 if notebook:
-                    lineno = 'n/a'
+                    lineno = "n/a"
                 else:
-                    lineno = getattr(node, 'lineno', 1)
+                    lineno = getattr(node, "lineno", 1)
                 yield CodeDocumentComment(
                     filename=filename,
                     lineno=lineno,
-                    name=getattr(node, 'name', None),
-                    text=node_docstring
+                    name=getattr(node, "name", None),
+                    text=node_docstring,
+                    package="",
                 )
         except TypeError:
             # node type may not be able to have docstrings
@@ -50,14 +53,13 @@ def iter_extract_docstring_from_text(
 
 
 def iter_extract_docstring_from_lines(
-    lines: Iterable[str]
+    lines: Iterable[str],
 ) -> Iterable[CodeDocumentComment]:
-    return iter_extract_docstring_from_text('\n'.join(lines))
+    return iter_extract_docstring_from_text("\n".join(lines))
 
 
 def iter_extract_docstring_from_file(
-    path: Union[str, Path],
-    encoding: str = DEFAULT_ENCODING
+    path: Union[str, Path], encoding: str = DEFAULT_ENCODING
 ) -> Iterable[CodeDocumentComment]:
     path = Path(path)
     txt = path.read_text(encoding=encoding)
@@ -72,13 +74,11 @@ def iter_extract_docstring_from_file(
                 txt.append(FAKE_FUNC.format(i, "    ".join(c["source"])))
         txt = "\n".join(txt)
         notebook = True
-    return iter_extract_docstring_from_text(txt, filename=str(path),
-                                            notebook=notebook)
+    return iter_extract_docstring_from_text(txt, filename=str(path), notebook=notebook)
 
 
 def iter_extract_docstring_from_files(
-    paths: Iterable[Union[str, Path]],
-    **kwargs
+    paths: Iterable[Union[str, Path]], **kwargs
 ) -> Iterable[CodeDocumentComment]:
     for path in paths:
         yield from iter_extract_docstring_from_file(path, **kwargs)
diff --git a/r2t2/docstring_reference_parser.py b/r2t2/docstring_reference_parser.py
index 03dcc0b..321e39b 100644
--- a/r2t2/docstring_reference_parser.py
+++ b/r2t2/docstring_reference_parser.py
@@ -5,17 +5,14 @@
 
 from r2t2.core import Biblio, BIBLIOGRAPHY, FunctionReference
 from r2t2.plain_text_parser import iter_parse_plain_text_references
-from r2t2.docstring_parser import (
-    CodeDocumentComment,
-    iter_extract_docstring_from_files
-)
+from r2t2.docstring_parser import CodeDocumentComment, iter_extract_docstring_from_files
 
 
 LOGGER = logging.getLogger(__name__)
 
 
-DOCSTRING_SHORT_PURPOSE = 'automatically parsed from docstring'
-NOTEBOOK_SHORT_PURPOSE = 'automatically parsed from markdown cell'
+DOCSTRING_SHORT_PURPOSE = "automatically parsed from docstring"
+NOTEBOOK_SHORT_PURPOSE = "automatically parsed from markdown cell"
 
 
 def expand_file_list(path: Union[Path, str]) -> List[Path]:
@@ -34,25 +31,25 @@ def get_function_reference_identifier(function_reference: FunctionReference) ->
 
 
 def get_function_reference_from_docstring(
-    docstring: CodeDocumentComment
+    docstring: CodeDocumentComment,
 ) -> FunctionReference:
     references = list(iter_parse_plain_text_references(docstring.text))
-    if docstring.lineno != 'n/a':
+    if docstring.lineno != "n/a":
         purpose = DOCSTRING_SHORT_PURPOSE
     else:
         purpose = NOTEBOOK_SHORT_PURPOSE
     return FunctionReference(
-        source=docstring.filename or '',
+        source=docstring.filename or "",
         line=docstring.lineno or 0,
-        name=docstring.name or '',
+        package=docstring.package or "",
+        name=docstring.name or "",
         references=references,
-        short_purpose=[purpose] * len(references)
+        short_purpose=[purpose] * len(references),
     )
 
 
 def iter_parse_docstring_function_references_from_files(
-    filenames: Iterable[Union[str, Path]],
-    **kwargs
+    filenames: Iterable[Union[str, Path]], **kwargs
 ) -> Iterable[Tuple[str, FunctionReference]]:
     for docstring in iter_extract_docstring_from_files(filenames, **kwargs):
         function_reference = get_function_reference_from_docstring(docstring)
@@ -62,9 +59,7 @@ def iter_parse_docstring_function_references_from_files(
 
 
 def parse_and_add_docstring_references_from_files(
-    filenames: Iterable[Union[str, Path]],
-    biblio: Biblio = None,
-    **kwargs
+    filenames: Iterable[Union[str, Path]], biblio: Biblio = None, **kwargs
 ):
     if biblio is None:
         biblio = BIBLIOGRAPHY
diff --git a/r2t2/plain_text_parser.py b/r2t2/plain_text_parser.py
index 348bf6d..9434335 100644
--- a/r2t2/plain_text_parser.py
+++ b/r2t2/plain_text_parser.py
@@ -3,26 +3,26 @@
 from typing import Iterable, List
 
 
-DOI_URL_HTTPS_PREFIX = 'https://doi.org/'
+DOI_URL_HTTPS_PREFIX = "https://doi.org/"
 
 
 def iter_doi(text: str) -> Iterable[str]:
-    for m in re.findall(r'\b10\.\d{4,}/\S+', text):
+    for m in re.findall(r"\b10\.\d{4,}/\S+", text):
         yield DOI_URL_HTTPS_PREFIX + str(m)
 
 
 def iter_sphinx_reference_names(text: str) -> Iterable[str]:
-    for m in re.finditer(r':cite:`([^`]+)`', text):
+    for m in re.finditer(r":cite:`([^`]+)`", text):
         yield m.group(1)
 
 
 def iter_latex_reference_names(text: str) -> Iterable[str]:
-    for m in re.finditer(r'\\cite(?:\[[^\]]*\])?{([^}]+)}', text):
+    for m in re.finditer(r"\\cite(?:\[[^\]]*\])?{([^}]+)}", text):
         yield m.group(1)
 
 
 def iter_doxygen_reference_names(text: str) -> Iterable[str]:
-    for m in re.finditer(r'\\cite\s(\S+)', text):
+    for m in re.finditer(r"\\cite\s(\S+)", text):
         yield m.group(1)
 
 
@@ -33,7 +33,7 @@ def iter_parse_plain_text_raw_bib_references(text: str) -> Iterable[str]:
 
 def iter_parse_plain_text_bib_references(text: str) -> Iterable[str]:
     for raw_reference in iter_parse_plain_text_raw_bib_references(text):
-        for ref_name in raw_reference.split(','):
+        for ref_name in raw_reference.split(","):
             yield ref_name.strip()
 
 
diff --git a/r2t2/runtime_tracker.py b/r2t2/runtime_tracker.py
index 34f6bf2..f8c80c7 100644
--- a/r2t2/runtime_tracker.py
+++ b/r2t2/runtime_tracker.py
@@ -17,8 +17,7 @@ def runtime_tracker(script: str, args: List[str], encoding: str):
 
     try:
         LOGGER.debug(
-            "loading script: %s (args: %s, encoding: %s)",
-            script, args, encoding
+            "loading script: %s (args: %s, encoding: %s)", script, args, encoding
         )
         with open(script, encoding=encoding) as fp:
             code = compile(fp.read(), script, "exec")
diff --git a/r2t2/static_parser.py b/r2t2/static_parser.py
index 4071ea0..1b71e3b 100644
--- a/r2t2/static_parser.py
+++ b/r2t2/static_parser.py
@@ -15,7 +15,7 @@ class FileReferenceParseError(FileParseError):
     pass
 
 
-DEFAULT_ENCODING = 'utf-8'
+DEFAULT_ENCODING = "utf-8"
 
 
 def locate_references(path: Union[Path, str], encoding: str = DEFAULT_ENCODING):
@@ -65,7 +65,7 @@ def locate_references_in_file(filename: Union[Path, str], encoding: str):
         raise
     except Exception as exc:
         raise FileParseError(
-            'failed to process %s due to %r' % (filename, exc)
+            "failed to process %s due to %r" % (filename, exc)
         ) from exc
 
 
@@ -81,7 +81,7 @@ def parse_references(
     try:
         name = re.findall(r"[\w']+", current)[1]
 
-        BIBLIOGRAPHY[identifier] = FunctionReference(name, line_num, source, [], [])
+        BIBLIOGRAPHY[identifier] = FunctionReference(name, line_num, source, "", [], [])
 
         def add_ref(i, j):
             one_ref = " ".join(ref_raw[i:j]).replace("@", "_")
@@ -93,5 +93,5 @@ def add_ref(i, j):
         reduce(add_ref, ref_lines)
     except Exception as exc:
         raise FileReferenceParseError(
-            'failed to process %s due to %r' % (identifier, exc)
+            "failed to process %s due to %r" % (identifier, exc)
         ) from exc
diff --git a/tests/conftest.py b/tests/conftest.py
index fbb1f90..7f0c3e7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -45,10 +45,10 @@ def a_great_function():
     return a_great_function
 
 
-@pytest.fixture(scope='session', autouse=True)
+@pytest.fixture(scope="session", autouse=True)
 def setup_logging():
-    for name in {'r2t2', 'tests'}:
-        logging.getLogger(name).setLevel('DEBUG')
+    for name in {"r2t2", "tests"}:
+        logging.getLogger(name).setLevel("DEBUG")
 
 
 @pytest.fixture()
diff --git a/tests/test__main__.py b/tests/test__main__.py
index 0e28594..ef76380 100644
--- a/tests/test__main__.py
+++ b/tests/test__main__.py
@@ -1,69 +1,36 @@
 import pytest
 
-from r2t2.__main__ import (
-    parse_args,
-    main
-)
+from r2t2.__main__ import parse_args, main
 
 
-VALID_FORMAT = 'markdown'
-INVALID_FORMAT = 'other'
+VALID_FORMAT = "markdown"
+INVALID_FORMAT = "other"
 
 
 class TestParseArgs:
     def test_should_not_fail_on_valid_format(self):
-        args = parse_args([
-            'run',
-            '--format=%s' % VALID_FORMAT,
-            'docs/examples'
-        ])
+        args = parse_args(["run", "--format=%s" % VALID_FORMAT, "docs/examples"])
         assert args.format == VALID_FORMAT
 
     def test_should_fail_on_invalid_format(self):
         with pytest.raises(SystemExit):
-            parse_args([
-                'run',
-                '--format=%s' % INVALID_FORMAT,
-                'docs/examples'
-            ])
+            parse_args(["run", "--format=%s" % INVALID_FORMAT, "docs/examples"])
 
 
-@pytest.mark.usefixtures('bibliography')
+@pytest.mark.usefixtures("bibliography")
 class TestMain:
-    def test_should_not_fail_on_static_analysis_of_examples_including_docstrings(
-        self
-    ):
-        main([
-            'static',
-            '--docstring',
-            'docs/examples'
-        ])
+    def test_should_not_fail_on_static_analysis_of_examples_including_docstrings(self):
+        main(["static", "--docstring", "docs/examples"])
 
-    def test_should_not_fail_on_static_analysis_of_examples_ignoring_docstrings(
-        self
-    ):
-        main([
-            'static',
-            'docs/examples'
-        ])
+    def test_should_not_fail_on_static_analysis_of_examples_ignoring_docstrings(self):
+        main(["static", "docs/examples"])
 
     def test_should_not_fail_on_runtime_analysis_of_examples(self):
-        main([
-            'run',
-            'docs/examples/minimal.py'
-        ])
+        main(["run", "docs/examples/minimal.py"])
 
     def test_should_not_fail_on_notebook(self):
-        main([
-            'static',
-            '--notebook',
-            'tests/fixtures/notebook_doi.ipynb'
-        ])
+        main(["static", "--notebook", "tests/fixtures/notebook_doi.ipynb"])
 
     def test_should_fail_on_non_notebook(self):
         with pytest.raises(Exception):
-            main([
-                'static',
-                '--notebook',
-                'docs/examples/docstring_doi.py'
-            ])
+            main(["static", "--notebook", "docs/examples/docstring_doi.py"])
diff --git a/tests/test_core.py b/tests/test_core.py
index 5733a3b..ad83aa3 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -86,7 +86,6 @@ def test_add_reference_from_doi(self, bib_with_tracking, decorated_with_doi):
 
 
 class TestAddSource:
-
     def test_add_source_exception_if_not_bibtex(self, bibliography, tmp_path):
         source = tmp_path / "my_source"
         with raises(ValueError):
diff --git a/tests/test_docstring_parser.py b/tests/test_docstring_parser.py
index d3c0baf..9071e34 100644
--- a/tests/test_docstring_parser.py
+++ b/tests/test_docstring_parser.py
@@ -4,12 +4,12 @@
     CodeDocumentComment,
     iter_extract_docstring_from_lines,
     iter_extract_docstring_from_file,
-    iter_extract_docstring_from_files
+    iter_extract_docstring_from_files,
 )
 
 
-DOC_STRING_LINE_1 = 'the docstring line 1'
-DOC_STRING_LINE_2 = 'the docstring line 2'
+DOC_STRING_LINE_1 = "the docstring line 1"
+DOC_STRING_LINE_2 = "the docstring line 2"
 
 
 class TestIterExtractDocstringFromLines:
@@ -17,70 +17,75 @@ def test_should_extract_no_docstrings_from_empty_file(self):
         assert list(iter_extract_docstring_from_lines([])) == []
 
     def test_should_extract_no_docstrings_from_file_without_docstrings(self):
-        assert list(iter_extract_docstring_from_lines([
-            'def some_function():',
-            '    pass'
-        ])) == []
+        assert (
+            list(
+                iter_extract_docstring_from_lines(["def some_function():", "    pass"])
+            )
+            == []
+        )
 
     def test_should_extract_module_level_docstring_using_double_quotes(self):
-        assert list(iter_extract_docstring_from_lines([
-            '"""',
-            DOC_STRING_LINE_1,
-            DOC_STRING_LINE_2,
-            '"""'
-        ])) == [CodeDocumentComment(
-            name=None,
-            lineno=1,
-            text='\n'.join([
-                DOC_STRING_LINE_1,
-                DOC_STRING_LINE_2
-            ])
-        )]
+        assert list(
+            iter_extract_docstring_from_lines(
+                ['"""', DOC_STRING_LINE_1, DOC_STRING_LINE_2, '"""']
+            )
+        ) == [
+            CodeDocumentComment(
+                name=None,
+                lineno=1,
+                text="\n".join([DOC_STRING_LINE_1, DOC_STRING_LINE_2]),
+                package="",
+            )
+        ]
 
     def test_should_extract_function_level_docstring_using_double_quotes(self):
-        assert list(iter_extract_docstring_from_lines([
-            'def some_function():',
-            '    """',
-            '    ' + DOC_STRING_LINE_1,
-            '    ' + DOC_STRING_LINE_2,
-            '    """',
-            '    pass'
-        ])) == [CodeDocumentComment(
-            name='some_function',
-            lineno=1,
-            text='\n'.join([
-                DOC_STRING_LINE_1,
-                DOC_STRING_LINE_2
-            ])
-        )]
+        assert list(
+            iter_extract_docstring_from_lines(
+                [
+                    "def some_function():",
+                    '    """',
+                    "    " + DOC_STRING_LINE_1,
+                    "    " + DOC_STRING_LINE_2,
+                    '    """',
+                    "    pass",
+                ]
+            )
+        ) == [
+            CodeDocumentComment(
+                name="some_function",
+                lineno=1,
+                text="\n".join([DOC_STRING_LINE_1, DOC_STRING_LINE_2]),
+                package="",
+            )
+        ]
 
 
 class TestIterExtractDocstringFromFile:
     def test_should_extract_module_level_docstring_using_double_quotes(
         self, temp_dir: Path
     ):
-        file_path = temp_dir / 'test.py'
-        file_path.write_text('\n'.join([
-            '"""',
-            DOC_STRING_LINE_1,
-            DOC_STRING_LINE_2,
-            '"""'
-        ]))
-        expected_docstrings = [CodeDocumentComment(
-            filename=str(file_path),
-            name=None,
-            lineno=1,
-            text='\n'.join([
-                DOC_STRING_LINE_1,
-                DOC_STRING_LINE_2
-            ])
-        )]
-        assert list(iter_extract_docstring_from_file(
-            str(file_path)
-        )) == expected_docstrings
-        assert list(iter_extract_docstring_from_file(
-            Path(file_path)
-        )) == expected_docstrings
-        assert list(iter_extract_docstring_from_files(
-            [str(file_path)]
-        )) == expected_docstrings
+        file_path = temp_dir / "test.py"
+        file_path.write_text(
+            "\n".join(['"""', DOC_STRING_LINE_1, DOC_STRING_LINE_2, '"""'])
+        )
+        expected_docstrings = [
+            CodeDocumentComment(
+                filename=str(file_path),
+                name=None,
+                lineno=1,
+                text="\n".join([DOC_STRING_LINE_1, DOC_STRING_LINE_2]),
+                package="",
+            )
+        ]
+        assert (
+            list(iter_extract_docstring_from_file(str(file_path)))
+            == expected_docstrings
+        )
+        assert (
+            list(iter_extract_docstring_from_file(Path(file_path)))
+            == expected_docstrings
+        )
+        assert (
+            list(iter_extract_docstring_from_files([str(file_path)]))
+            == expected_docstrings
+        )
diff --git a/tests/test_docstring_reference_parser.py b/tests/test_docstring_reference_parser.py
index 3ac5028..e9df904 100644
--- a/tests/test_docstring_reference_parser.py
+++ b/tests/test_docstring_reference_parser.py
@@ -4,125 +4,94 @@
 from r2t2.docstring_reference_parser import (
     DOCSTRING_SHORT_PURPOSE,
     NOTEBOOK_SHORT_PURPOSE,
-    parse_and_add_docstring_references_from_files
+    parse_and_add_docstring_references_from_files,
 )
 
-DOI_URL_HTTPS_PREFIX = 'https://doi.org/'
+DOI_URL_HTTPS_PREFIX = "https://doi.org/"
 HERE = Path(__file__).parent
 FIXTURES = HERE / "fixtures"
 
-DOI_1 = '10.1234/zenodo.1234567'
-DOI_2 = '10.5281/zenodo.1185316'
-DOI_3 = '10.3435/zenodo.1579823'
+DOI_1 = "10.1234/zenodo.1234567"
+DOI_2 = "10.5281/zenodo.1185316"
+DOI_3 = "10.3435/zenodo.1579823"
 
 
 class TestParseAndAddDocstringReferencesFromFiles:
-    def test_should_parse_docstring_reference(
-        self, temp_dir: Path
-    ):
-        file_path = temp_dir / 'test.py'
-        file_path.write_text('\n'.join([
-            'def some_function():'
-            '    """',
-            '    ' + DOI_1,
-            '    """'
-        ]))
-        biblio = Biblio()
-        parse_and_add_docstring_references_from_files(
-            [file_path],
-            biblio=biblio
+    def test_should_parse_docstring_reference(self, temp_dir: Path):
+        file_path = temp_dir / "test.py"
+        file_path.write_text(
+            "\n".join(["def some_function():" '    """', "    " + DOI_1, '    """'])
         )
-        expected_identifier = '{source}:{name}:{line_num}'.format(
+        biblio = Biblio()
+        parse_and_add_docstring_references_from_files([file_path], biblio=biblio)
+        expected_identifier = "{source}:{name}:{line_num}".format(
             source=str(file_path),
-            name='some_function',
+            name="some_function",
             line_num=1,
         )
         assert biblio.keys() == {expected_identifier}
         function_reference = biblio[expected_identifier]
-        assert function_reference.name == 'some_function'
+        assert function_reference.name == "some_function"
         assert function_reference.source == str(file_path)
         assert function_reference.line == 1
         assert function_reference.references == [DOI_URL_HTTPS_PREFIX + DOI_1]
         assert function_reference.short_purpose == [DOCSTRING_SHORT_PURPOSE]
 
-    def test_should_not_override_existing_reference(
-        self, temp_dir: Path
-    ):
-        file_path = temp_dir / 'test.py'
-        file_path.write_text('\n'.join([
-            'def some_function():'
-            '    """',
-            '    ' + DOI_1,
-            '    """'
-        ]))
-        expected_identifier = '{source}:{name}:{line_num}'.format(
+    def test_should_not_override_existing_reference(self, temp_dir: Path):
+        file_path = temp_dir / "test.py"
+        file_path.write_text(
+            "\n".join(["def some_function():" '    """', "    " + DOI_1, '    """'])
+        )
+        expected_identifier = "{source}:{name}:{line_num}".format(
             source=str(file_path),
-            name='some_function',
+            name="some_function",
             line_num=1,
         )
         biblio = Biblio()
         existing_function_reference = FunctionReference(
-            name='other',
-            source='other.py',
+            name="other",
+            source="other.py",
             line=-1,
-            short_purpose=['For testing'],
-            references=['test/123']
+            package=",",
+            short_purpose=["For testing"],
+            references=["test/123"],
         )
         biblio[expected_identifier] = existing_function_reference
-        parse_and_add_docstring_references_from_files(
-            [file_path],
-            biblio=biblio
-        )
+        parse_and_add_docstring_references_from_files([file_path], biblio=biblio)
         assert biblio == {expected_identifier: existing_function_reference}
 
-    def test_should_not_add_function_reference_without_references(
-        self, temp_dir: Path
-    ):
-        file_path = temp_dir / 'test.py'
-        file_path.write_text('\n'.join([
-            'def some_function():'
-            '    """',
-            '    some docstring',
-            '    """'
-        ]))
-        biblio = Biblio()
-        parse_and_add_docstring_references_from_files(
-            [file_path],
-            biblio=biblio
+    def test_should_not_add_function_reference_without_references(self, temp_dir: Path):
+        file_path = temp_dir / "test.py"
+        file_path.write_text(
+            "\n".join(
+                ["def some_function():" '    """', "    some docstring", '    """']
+            )
         )
+        biblio = Biblio()
+        parse_and_add_docstring_references_from_files([file_path], biblio=biblio)
         assert not biblio
 
-    def test_should_parse_notebook_references(
-        self
-    ):
+    def test_should_parse_notebook_references(self):
         file_path = FIXTURES / "notebook_doi.ipynb"
         biblio = Biblio()
-        parse_and_add_docstring_references_from_files(
-            [file_path],
-            biblio=biblio
-        )
-        identifiers = '{source}:{name}:{line_num}'
-        names = ['cell_0', 'cell_4']
+        parse_and_add_docstring_references_from_files([file_path], biblio=biblio)
+        identifiers = "{source}:{name}:{line_num}"
+        names = ["cell_0", "cell_4"]
         dois = [[DOI_2, DOI_3], [DOI_1]]
         expected_identifiers = [
-            identifiers.format(
-                source=str(file_path),
-                name=names[0],
-                line_num='n/a'
-            ),
-            identifiers.format(
-                source=str(file_path),
-                name=names[1],
-                line_num='n/a'
-            ),
+            identifiers.format(source=str(file_path), name=names[0], line_num="n/a"),
+            identifiers.format(source=str(file_path), name=names[1], line_num="n/a"),
         ]
         assert biblio.keys() == set(expected_identifiers)
         for i, identifier in enumerate(expected_identifiers):
             function_reference = biblio[identifier]
             assert function_reference.name == names[i]
             assert function_reference.source == str(file_path)
-            assert function_reference.line == 'n/a'
-            assert function_reference.references == [DOI_URL_HTTPS_PREFIX + d
-                                                     for d in dois[i]]
-            assert function_reference.short_purpose == ([NOTEBOOK_SHORT_PURPOSE]
-                                                        * len(dois[i]))
+            assert function_reference.line == "n/a"
+            assert function_reference.package == ""
+            assert function_reference.references == [
+                DOI_URL_HTTPS_PREFIX + d for d in dois[i]
+            ]
+            assert function_reference.short_purpose == (
+                [NOTEBOOK_SHORT_PURPOSE] * len(dois[i])
+            )
diff --git a/tests/test_plain_text_parser.py b/tests/test_plain_text_parser.py
index 34c7354..5ab329f 100644
--- a/tests/test_plain_text_parser.py
+++ b/tests/test_plain_text_parser.py
@@ -1,70 +1,64 @@
-from r2t2.plain_text_parser import (
-    parse_plain_text_references
-)
+from r2t2.plain_text_parser import parse_plain_text_references
 
 
-DOI_URL_HTTP_PREFIX = 'http://doi.org/'
-DOI_URL_HTTPS_PREFIX = 'https://doi.org/'
+DOI_URL_HTTP_PREFIX = "http://doi.org/"
+DOI_URL_HTTPS_PREFIX = "https://doi.org/"
 
-DOI_1 = '10.1234/zenodo.1234567'
+DOI_1 = "10.1234/zenodo.1234567"
 
-REF_NAME_1 = 'Smith2001-1'
-REF_NAME_2 = 'Johnson2002-2'
+REF_NAME_1 = "Smith2001-1"
+REF_NAME_2 = "Johnson2002-2"
 
 
 class TestParsePlainTextReferences:
     def test_should_return_empty_list_for_empty_string(self):
-        assert parse_plain_text_references('') == []
+        assert parse_plain_text_references("") == []
 
     def test_should_return_empty_list_for_text_without_reference(self):
-        assert parse_plain_text_references('description of some function') == []
+        assert parse_plain_text_references("description of some function") == []
 
     def test_should_parse_doi_without_additional_text(self):
-        assert parse_plain_text_references(
-            DOI_1
-        ) == [DOI_URL_HTTPS_PREFIX + DOI_1]
+        assert parse_plain_text_references(DOI_1) == [DOI_URL_HTTPS_PREFIX + DOI_1]
 
     def test_should_parse_doi_with_surround_text(self):
-        assert parse_plain_text_references(
-            'DOI: ' + DOI_1 + ' used for xyz'
-        ) == [DOI_URL_HTTPS_PREFIX + DOI_1]
+        assert parse_plain_text_references("DOI: " + DOI_1 + " used for xyz") == [
+            DOI_URL_HTTPS_PREFIX + DOI_1
+        ]
 
     def test_should_parse_doi_http_url(self):
-        assert parse_plain_text_references(
-            DOI_URL_HTTP_PREFIX + DOI_1
-        ) == [DOI_URL_HTTPS_PREFIX + DOI_1]
+        assert parse_plain_text_references(DOI_URL_HTTP_PREFIX + DOI_1) == [
+            DOI_URL_HTTPS_PREFIX + DOI_1
+        ]
 
     def test_should_parse_doi_https_url(self):
-        assert parse_plain_text_references(
-            DOI_URL_HTTP_PREFIX + DOI_1
-        ) == [DOI_URL_HTTPS_PREFIX + DOI_1]
+        assert parse_plain_text_references(DOI_URL_HTTP_PREFIX + DOI_1) == [
+            DOI_URL_HTTPS_PREFIX + DOI_1
+        ]
 
     def test_should_parse_single_sphinx_cite_reference(self):
         assert parse_plain_text_references(
-            r'See :cite:`' + REF_NAME_1 + r'` for an introduction'
+            r"See :cite:`" + REF_NAME_1 + r"` for an introduction"
         ) == [REF_NAME_1]
 
     def test_should_parse_multiple_sphinx_cite_references(self):
         assert parse_plain_text_references(
-            r'See :cite:`' + REF_NAME_1 + ', ' + REF_NAME_2 + r'` for an introduction'
+            r"See :cite:`" + REF_NAME_1 + ", " + REF_NAME_2 + r"` for an introduction"
         ) == [REF_NAME_1, REF_NAME_2]
 
     def test_should_parse_single_latex_cite_reference(self):
-        assert parse_plain_text_references(
-            r'\\cite{' + REF_NAME_1 + r'}'
-        ) == [REF_NAME_1]
+        assert parse_plain_text_references(r"\\cite{" + REF_NAME_1 + r"}") == [
+            REF_NAME_1
+        ]
 
     def test_should_parse_multiple_latex_cite_reference(self):
         assert parse_plain_text_references(
-            r'\\cite{' + REF_NAME_1 + ', ' + REF_NAME_2 + r'}'
+            r"\\cite{" + REF_NAME_1 + ", " + REF_NAME_2 + r"}"
         ) == [REF_NAME_1, REF_NAME_2]
 
     def test_should_parse_latex_cite_reference_with_page_number(self):
-        assert parse_plain_text_references(
-            r'\\cite[p.~215]{' + REF_NAME_1 + r'}'
-        ) == [REF_NAME_1]
+        assert parse_plain_text_references(r"\\cite[p.~215]{" + REF_NAME_1 + r"}") == [
+            REF_NAME_1
+        ]
 
     def test_should_parse_single_doxygen_cite_reference(self):
-        assert parse_plain_text_references(
-            r'\\cite ' + REF_NAME_1
-        ) == [REF_NAME_1]
+        assert parse_plain_text_references(r"\\cite " + REF_NAME_1) == [REF_NAME_1]

From 23bb1bc8151ef17e54e91f38e0478070182cd28e Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Thu, 3 Sep 2020 20:46:25 +0100
Subject: [PATCH 06/13] Add process references

---
 r2t2/core.py    | 42 +++++++++++++++++++++++++++++++++++++++---
 r2t2/doi2bib.py | 19 +++++++++++++++++++
 2 files changed, 58 insertions(+), 3 deletions(-)
 create mode 100644 r2t2/doi2bib.py

diff --git a/r2t2/core.py b/r2t2/core.py
index e36ab0b..5f94efa 100755
--- a/r2t2/core.py
+++ b/r2t2/core.py
@@ -3,9 +3,12 @@
 from typing import NamedTuple, List, Optional, Callable, Dict, Union
 from functools import reduce
 from pathlib import Path
+from warnings import warn
 
 import bibtexparser as bp
 
+from .doi2bib import doi2bib
+
 
 class FunctionReference(NamedTuple):
     name: str
@@ -88,10 +91,43 @@ def load_source(self, package: str) -> None:
         with self._sources[package].open() as f:
             self._sources_loaded[package] = bp.load(f)
 
-    def save_source(self, package: str) -> None:
-        """Saves the (possibly modified) source for the given package."""
+    def add_entry_to_source(self, entry: dict, package: str) -> None:
+        """Add entry to source and save it source for the given package."""
+        self._sources_loaded[package].entries.append(entry)
         with self._sources[package].open() as f:
-             bp.dump(self._sources_loaded[package], f)
+            bp.dump(self._sources_loaded[package], f)
+
+    def process_ref(self, ref: FunctionReference) -> str:
+        if ref.package not in self._sources_loaded:
+            self.load_source(ref.package)
+
+        for refstr in ref.references:
+            if refstr.startswith("[plain]"):
+                return refstr.strip("[plain]")
+
+            elif refstr.startswith("[bibkey]"):
+                return self._sources_loaded[ref.package].entries_dict[
+                    refstr.strip("[bibkey]")
+                ]
+
+            elif refstr.startswith("[doi]"):
+                for entry in self._sources_loaded[ref.package].entries:
+                    out = entry if entry.get("doi") == refstr.strip("[doi]") else None
+                    if out:
+                        db = bp.bibdatabase.BibDatabase()
+                        db.entries = [out]
+                        return bp.dumps(db)
+
+                out = doi2bib(refstr.strip("[doi]"))
+                if out:
+                    self.add_entry_to_source(bp.loads(out), ref.package)
+                    return out
+
+                warn(
+                    f"Reference with doi={refstr.strip('[doi]')} not found!",
+                    UserWarning,
+                )
+                return ""
 
 
 BIBLIOGRAPHY: Biblio = Biblio()
diff --git a/r2t2/doi2bib.py b/r2t2/doi2bib.py
new file mode 100644
index 0000000..c48c1b4
--- /dev/null
+++ b/r2t2/doi2bib.py
@@ -0,0 +1,19 @@
+"""The following code has been adapted from:
+
+https://gist.github.com/jrsmith3/5513926
+"""
+
+import requests
+from warnings import warn
+from typing import Optional
+
+
+def doi2bib(doi) -> Optional[str]:
+    """Return a bibTeX string of metadata for a given DOI."""
+    url = doi if "doi.org" in doi else f"https://doi.org/{doi}"
+    headers = {"accept": "application/x-bibtex"}
+    r = requests.get(url, headers=headers)
+    if "DOI Not Found" in r.text:
+        warn(f"Reference with doi={doi} not found!", UserWarning)
+        return None
+    return r.text

From cad960bdb027f6684e9a0058fc1079a90e793f5b Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Thu, 3 Sep 2020 20:55:34 +0100
Subject: [PATCH 07/13] Change output of process_ref to list

---
 r2t2/core.py | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/r2t2/core.py b/r2t2/core.py
index 5f94efa..93c15d3 100755
--- a/r2t2/core.py
+++ b/r2t2/core.py
@@ -97,37 +97,45 @@ def add_entry_to_source(self, entry: dict, package: str) -> None:
         with self._sources[package].open() as f:
             bp.dump(self._sources_loaded[package], f)
 
-    def process_ref(self, ref: FunctionReference) -> str:
+    def process_ref(self, ref: FunctionReference) -> List[str]:
+        """Process the reference keys and retrieves the full information."""
         if ref.package not in self._sources_loaded:
             self.load_source(ref.package)
 
+        processed = []
         for refstr in ref.references:
             if refstr.startswith("[plain]"):
-                return refstr.strip("[plain]")
+                processed.append(refstr.strip("[plain]"))
 
-            elif refstr.startswith("[bibkey]"):
-                return self._sources_loaded[ref.package].entries_dict[
-                    refstr.strip("[bibkey]")
-                ]
+            elif refstr.startswith("[bibtex]"):
+                processed.append(self._sources_loaded[ref.package].entries_dict[
+                    refstr.strip("[bibtex]")
+                ])
 
             elif refstr.startswith("[doi]"):
+                out = None
                 for entry in self._sources_loaded[ref.package].entries:
                     out = entry if entry.get("doi") == refstr.strip("[doi]") else None
                     if out:
                         db = bp.bibdatabase.BibDatabase()
                         db.entries = [out]
-                        return bp.dumps(db)
+                        processed.append(bp.dumps(db))
+                        break
 
-                out = doi2bib(refstr.strip("[doi]"))
                 if out:
-                    self.add_entry_to_source(bp.loads(out), ref.package)
-                    return out
+                    continue
+                else:
+                    out = doi2bib(refstr.strip("[doi]"))
+                    if out:
+                        self.add_entry_to_source(bp.loads(out), ref.package)
+                        processed.append(out)
+                        continue
 
                 warn(
                     f"Reference with doi={refstr.strip('[doi]')} not found!",
                     UserWarning,
                 )
-                return ""
+                return [""]
 
 
 BIBLIOGRAPHY: Biblio = Biblio()

From 7bc551b3bc8f877e9f6402829be5fc334a206043 Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Sun, 6 Sep 2020 16:11:04 +0100
Subject: [PATCH 08/13] Add register_process decorator

---
 r2t2/core.py | 131 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 96 insertions(+), 35 deletions(-)

diff --git a/r2t2/core.py b/r2t2/core.py
index 93c15d3..2b01edc 100755
--- a/r2t2/core.py
+++ b/r2t2/core.py
@@ -1,7 +1,7 @@
 import inspect
 import wrapt
 from typing import NamedTuple, List, Optional, Callable, Dict, Union
-from functools import reduce
+from functools import reduce, partial
 from pathlib import Path
 from warnings import warn
 
@@ -21,6 +21,26 @@ class FunctionReference(NamedTuple):
 
 class Biblio(dict):
     track_references: bool = False
+    _processes: Dict[str, Callable] = {}
+
+    @classmethod
+    def register_process(cls, f: Optional[Callable] = None, name: Optional[str] = None):
+        """Register a function for processing references in the registry.
+
+        Args:
+            f: Function to process references.
+            name (str): Name of the type of reference to process, eg. plain, doi...
+
+        Returns:
+            The same input function
+        """
+        if f is None:
+            return partial(cls.register_process, name=name)
+
+        name = name if name else f.__name__
+
+        cls._processes[name] = f
+        return f
 
     def __init__(self):
         super().__init__()
@@ -91,6 +111,12 @@ def load_source(self, package: str) -> None:
         with self._sources[package].open() as f:
             self._sources_loaded[package] = bp.load(f)
 
+    def get_source(self, package: str) -> bp.bibdatabase.BibDatabase:
+        """Provide the requested sources database."""
+        if package not in self._sources_loaded:
+            self.load_source(package)
+        return self._sources_loaded[package]
+
     def add_entry_to_source(self, entry: dict, package: str) -> None:
         """Add entry to source and save it source for the given package."""
         self._sources_loaded[package].entries.append(entry)
@@ -99,43 +125,14 @@ def add_entry_to_source(self, entry: dict, package: str) -> None:
 
     def process_ref(self, ref: FunctionReference) -> List[str]:
         """Process the reference keys and retrieves the full information."""
-        if ref.package not in self._sources_loaded:
-            self.load_source(ref.package)
+        self.get_source(ref.package)
 
         processed = []
         for refstr in ref.references:
-            if refstr.startswith("[plain]"):
-                processed.append(refstr.strip("[plain]"))
-
-            elif refstr.startswith("[bibtex]"):
-                processed.append(self._sources_loaded[ref.package].entries_dict[
-                    refstr.strip("[bibtex]")
-                ])
-
-            elif refstr.startswith("[doi]"):
-                out = None
-                for entry in self._sources_loaded[ref.package].entries:
-                    out = entry if entry.get("doi") == refstr.strip("[doi]") else None
-                    if out:
-                        db = bp.bibdatabase.BibDatabase()
-                        db.entries = [out]
-                        processed.append(bp.dumps(db))
-                        break
-
-                if out:
-                    continue
-                else:
-                    out = doi2bib(refstr.strip("[doi]"))
-                    if out:
-                        self.add_entry_to_source(bp.loads(out), ref.package)
-                        processed.append(out)
-                        continue
-
-                warn(
-                    f"Reference with doi={refstr.strip('[doi]')} not found!",
-                    UserWarning,
-                )
-                return [""]
+            rtype, rstr = refstr.strip("[").split("]", 1)
+            processed.append(self._processes[rtype](rstr, ref.package))
+
+        return processed
 
 
 BIBLIOGRAPHY: Biblio = Biblio()
@@ -194,3 +191,67 @@ def wrapper(wrapped, instance, args, kwargs):
         return wrapped(*args, **kwargs)
 
     return wrapper
+
+
+@Biblio.register_process("plain")
+def process_plain(ref: str, *args, **kwargs) -> str:
+    """ Process a plain string reference. Dummy function.
+
+    Args:
+        ref (str): The input reference string
+
+    Returns:
+        The same input string.
+    """
+    return ref
+
+
+@Biblio.register_process("bibtex")
+def process_bibtex(ref: str, package: str, *args, **kwargs) -> str:
+    """ Process a bibtex key reference.
+
+    Args:
+        ref (str): The bibtex key.
+        package (str): The package from where to get the reference from.
+
+    Raises:
+        KeyError: If the reference source for that package does not contain the
+            requested key.
+    Returns:
+        A dictionary with the reference full information
+    """
+    return BIBLIOGRAPHY.get_source(package).entries_dict[ref]
+
+
+@Biblio.register_process("doi")
+def process_doi(ref: str, package: str, *args, **kwargs) -> str:
+    """ Process a doi key reference.
+
+    First, it will look for the reference in the database for the given package. If it
+    is not found there, it will retrieved it from the internet. If successful, the
+    reference will be added to the database, so future requests to access this reference
+    will be local.
+
+    Args:
+        ref (str): The doi of the reference.
+        package (str): The package from where to get the reference from in the first
+        instance and where to save the reference after getting it from the internet.
+
+    Returns:
+        A dictionary with the reference full information
+    """
+    db = BIBLIOGRAPHY.get_source(package)
+    for entry in db.entries:
+        out = entry if entry.get("doi") == ref else None
+        if out:
+            return out
+
+    out = doi2bib(ref)
+    if out:
+        BIBLIOGRAPHY.add_entry_to_source(bp.loads(out), package)
+        return db.entries[-1]
+
+    warn(
+        f"Reference with doi={ref} not found!", UserWarning,
+    )
+    return ""

From 42a12e2f6c86919394a8b108113939b1731715bf Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Sun, 6 Sep 2020 16:17:10 +0100
Subject: [PATCH 09/13] Modify process_plain to return a dict

---
 r2t2/core.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/r2t2/core.py b/r2t2/core.py
index 2b01edc..ddf5839 100755
--- a/r2t2/core.py
+++ b/r2t2/core.py
@@ -123,7 +123,7 @@ def add_entry_to_source(self, entry: dict, package: str) -> None:
         with self._sources[package].open() as f:
             bp.dump(self._sources_loaded[package], f)
 
-    def process_ref(self, ref: FunctionReference) -> List[str]:
+    def process_ref(self, ref: FunctionReference) -> List[Dict]:
         """Process the reference keys and retrieves the full information."""
         self.get_source(ref.package)
 
@@ -194,20 +194,21 @@ def wrapper(wrapped, instance, args, kwargs):
 
 
 @Biblio.register_process("plain")
-def process_plain(ref: str, *args, **kwargs) -> str:
+def process_plain(ref: str, *args, **kwargs) -> Dict:
     """ Process a plain string reference. Dummy function.
 
     Args:
         ref (str): The input reference string
 
     Returns:
-        The same input string.
+        A dictionary with the reference string as "title", a unique ID equal to the hash
+        of the reference string and an "ENTRYTYPE" equal to "misc".
     """
-    return ref
+    return {"ID": hash(ref), "ENTRYTYPE": "misc", "title": ref}
 
 
 @Biblio.register_process("bibtex")
-def process_bibtex(ref: str, package: str, *args, **kwargs) -> str:
+def process_bibtex(ref: str, package: str, *args, **kwargs) -> Dict:
     """ Process a bibtex key reference.
 
     Args:
@@ -224,7 +225,7 @@ def process_bibtex(ref: str, package: str, *args, **kwargs) -> str:
 
 
 @Biblio.register_process("doi")
-def process_doi(ref: str, package: str, *args, **kwargs) -> str:
+def process_doi(ref: str, package: str, *args, **kwargs) -> Dict:
     """ Process a doi key reference.
 
     First, it will look for the reference in the database for the given package. If it
@@ -254,4 +255,4 @@ def process_doi(ref: str, package: str, *args, **kwargs) -> str:
     warn(
         f"Reference with doi={ref} not found!", UserWarning,
     )
-    return ""
+    return {}

From 93414fa5940eb52cda9ce81ef8ea002f3abba9d6 Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Sun, 6 Sep 2020 16:19:55 +0100
Subject: [PATCH 10/13] Change warning to error in process_doi

---
 r2t2/core.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/r2t2/core.py b/r2t2/core.py
index ddf5839..52bd17d 100755
--- a/r2t2/core.py
+++ b/r2t2/core.py
@@ -218,6 +218,7 @@ def process_bibtex(ref: str, package: str, *args, **kwargs) -> Dict:
     Raises:
         KeyError: If the reference source for that package does not contain the
             requested key.
+
     Returns:
         A dictionary with the reference full information
     """
@@ -236,7 +237,10 @@ def process_doi(ref: str, package: str, *args, **kwargs) -> Dict:
     Args:
         ref (str): The doi of the reference.
         package (str): The package from where to get the reference from in the first
-        instance and where to save the reference after getting it from the internet.
+        instance and where to save the reference after getting it online.
+
+    Raises:
+        KeyError: If the reference is not found neither in the database nor online.
 
     Returns:
         A dictionary with the reference full information
@@ -252,7 +256,4 @@ def process_doi(ref: str, package: str, *args, **kwargs) -> Dict:
         BIBLIOGRAPHY.add_entry_to_source(bp.loads(out), package)
         return db.entries[-1]
 
-    warn(
-        f"Reference with doi={ref} not found!", UserWarning,
-    )
-    return {}
+    raise KeyError(f"Reference with doi={ref} not found in the database neither online")

From d36d29f5a32bc4c6a98007f664883173a7faae63 Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Sun, 6 Sep 2020 16:23:00 +0100
Subject: [PATCH 11/13] Remove unused package import

---
 r2t2/core.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/r2t2/core.py b/r2t2/core.py
index 52bd17d..6ef7413 100755
--- a/r2t2/core.py
+++ b/r2t2/core.py
@@ -3,7 +3,6 @@
 from typing import NamedTuple, List, Optional, Callable, Dict, Union
 from functools import reduce, partial
 from pathlib import Path
-from warnings import warn
 
 import bibtexparser as bp
 

From a4a3cd74fbdc23fc82c8e9a8b288f0cf15e23c81 Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Sun, 6 Sep 2020 16:26:46 +0100
Subject: [PATCH 12/13] Add requests as dependency

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 58b7cdc..88178bc 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
     author="Research Computing Service, Imperial College London",
     author_email="rcs-support@imperial.ac.uk",
     url="https://github.com/ImperialCollegeLondon/R2T2",
-    install_requires=["wrapt", "bibtexparser"],
+    install_requires=["wrapt", "bibtexparser", "requests"],
     tests_require=tests_require,
     extras_require={"dev": tests_require + precommit},
     packages=find_packages("."),

From 6611f2323490527f0eca7758e33c4c9d5398de6d Mon Sep 17 00:00:00 2001
From: Diego <d.alonso-alvarez@imperial.ac.uk>
Date: Mon, 7 Sep 2020 05:38:00 +0100
Subject: [PATCH 13/13] Use keywords in register process

---
 r2t2/core.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/r2t2/core.py b/r2t2/core.py
index 6ef7413..1e69e30 100755
--- a/r2t2/core.py
+++ b/r2t2/core.py
@@ -192,7 +192,7 @@ def wrapper(wrapped, instance, args, kwargs):
     return wrapper
 
 
-@Biblio.register_process("plain")
+@Biblio.register_process(name="plain")
 def process_plain(ref: str, *args, **kwargs) -> Dict:
     """ Process a plain string reference. Dummy function.
 
@@ -206,7 +206,7 @@ def process_plain(ref: str, *args, **kwargs) -> Dict:
     return {"ID": hash(ref), "ENTRYTYPE": "misc", "title": ref}
 
 
-@Biblio.register_process("bibtex")
+@Biblio.register_process(name="bibtex")
 def process_bibtex(ref: str, package: str, *args, **kwargs) -> Dict:
     """ Process a bibtex key reference.
 
@@ -224,7 +224,7 @@ def process_bibtex(ref: str, package: str, *args, **kwargs) -> Dict:
     return BIBLIOGRAPHY.get_source(package).entries_dict[ref]
 
 
-@Biblio.register_process("doi")
+@Biblio.register_process(name="doi")
 def process_doi(ref: str, package: str, *args, **kwargs) -> Dict:
     """ Process a doi key reference.