From 23964b86935b1fcd65b8a1af4b5581a70b641239 Mon Sep 17 00:00:00 2001 From: Diego Date: Thu, 3 Sep 2020 16:10:32 +0100 Subject: [PATCH 01/13] Add package to FunctionReference --- r2t2/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/r2t2/core.py b/r2t2/core.py index 210857d..a1cb60e 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -8,6 +8,7 @@ class FunctionReference(NamedTuple): name: str line: int source: str + package: str short_purpose: List[str] references: List[str] @@ -80,13 +81,14 @@ def wrapper(wrapped, instance, args, kwargs): source = inspect.getsourcefile(wrapped) line = inspect.getsourcelines(wrapped)[1] identifier = f"{source}:{line}" + package = inspect.getmodule(inspect.stack()[1][0]).__name__.split(".")[0] if identifier in BIBLIOGRAPHY and ref in BIBLIOGRAPHY[identifier].references: return wrapped(*args, **kwargs) if identifier not in BIBLIOGRAPHY: BIBLIOGRAPHY[identifier] = FunctionReference(wrapped.__name__, line, source, - [], []) + package, [], []) BIBLIOGRAPHY[identifier].short_purpose.append(short_purpose) BIBLIOGRAPHY[identifier].references.append(ref) From 9dde01a4ebceafcd339156cd37f02110331e2935 Mon Sep 17 00:00:00 2001 From: Diego Date: Thu, 3 Sep 2020 16:48:16 +0100 Subject: [PATCH 02/13] Add load of reference sources --- r2t2/core.py | 9 +++++++++ setup.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/r2t2/core.py b/r2t2/core.py index 98448ca..810a180 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -4,6 +4,8 @@ from functools import reduce from pathlib import Path +import bibtexparser as bp + class FunctionReference(NamedTuple): name: str @@ -20,6 +22,7 @@ class Biblio(dict): def __init__(self): super().__init__() self._sources: Dict[str, Path] = {} + self._sources_loaded: Dict[str, bp.bibdatabase.BibDatabase] = {} def __str__(self): def add_record(out, record): @@ -38,6 +41,7 @@ def add_record(out, record): def clear(self) -> None: super().clear() self._sources.clear() + self._sources_loaded.clear() @property def references(self): @@ -79,6 +83,11 @@ def add_source(self, source: Union[str, Path]) -> None: ) self._sources[package] = src + def load_source(self, package: str) -> None: + """Open the source for the given package.""" + with self._sources[package].open("w+") as f: + self._sources_loaded[package] = bp.load(f) + BIBLIOGRAPHY: Biblio = Biblio() diff --git a/setup.py b/setup.py index 3df903a..58b7cdc 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ author="Research Computing Service, Imperial College London", author_email="rcs-support@imperial.ac.uk", url="https://github.com/ImperialCollegeLondon/R2T2", - install_requires=["wrapt"], + install_requires=["wrapt", "bibtexparser"], tests_require=tests_require, extras_require={"dev": tests_require + precommit}, packages=find_packages("."), From a44a5c8446ca4b63287bd782701c64c5e095f3d7 Mon Sep 17 00:00:00 2001 From: Diego Date: Thu, 3 Sep 2020 17:06:11 +0100 Subject: [PATCH 03/13] Add test for load source --- tests/test_core.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index 5733a3b..7583203 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -86,7 +86,6 @@ def test_add_reference_from_doi(self, bib_with_tracking, decorated_with_doi): class TestAddSource: - def test_add_source_exception_if_not_bibtex(self, bibliography, tmp_path): source = tmp_path / "my_source" with raises(ValueError): @@ -110,3 +109,23 @@ def test_add_source(self, bibliography, tmp_path): bibliography.add_source(source) assert "tests" in bibliography._sources assert bibliography._sources["tests"] == source + + +class TestLoadSource: + + def test_load_source(self, bibliography, tmp_path): + source = tmp_path / "my_source.bib" + ref = """@misc{sulzer_marquis_timms_robinson_chapman_2020, + title={Python Battery Mathematical Modelling (PyBaMM)}, + DOI={10.1149/osf.io/67ckj}, + publisher={ECSarXiv}, + author={Sulzer, Valentin and Marquis, Scott G and Timms, Robert and Robinson, Martin and Chapman, S. J}, # noqa: E501 + year={2020}, +} +}""" + with source.open("w") as f: + f.write(ref) + + bibliography.add_source(source) + bibliography.load_source("tests") + assert "tests" in bibliography._sources_loaded From 6c4e2301dee632b5214effda0f6b2be519a8763a Mon Sep 17 00:00:00 2001 From: Diego Date: Thu, 3 Sep 2020 18:47:02 +0100 Subject: [PATCH 04/13] Add load and save source --- r2t2/core.py | 7 ++++++- tests/test_core.py | 20 +++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/r2t2/core.py b/r2t2/core.py index 810a180..9200e2a 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -85,9 +85,14 @@ def add_source(self, source: Union[str, Path]) -> None: def load_source(self, package: str) -> None: """Open the source for the given package.""" - with self._sources[package].open("w+") as f: + with self._sources[package].open() as f: self._sources_loaded[package] = bp.load(f) + def save_source(self, package: str) -> None: + """Saves the (possibly modified) source for the given package.""" + with self._sources[package].open() as f: + bp.dump(self._sources_loaded[package], f) + BIBLIOGRAPHY: Biblio = Biblio() diff --git a/tests/test_core.py b/tests/test_core.py index 7583203..5e98ab4 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -115,17 +115,23 @@ class TestLoadSource: def test_load_source(self, bibliography, tmp_path): source = tmp_path / "my_source.bib" - ref = """@misc{sulzer_marquis_timms_robinson_chapman_2020, - title={Python Battery Mathematical Modelling (PyBaMM)}, - DOI={10.1149/osf.io/67ckj}, - publisher={ECSarXiv}, - author={Sulzer, Valentin and Marquis, Scott G and Timms, Robert and Robinson, Martin and Chapman, S. J}, # noqa: E501 - year={2020}, + ref = """@article{Sulzer_2019, + doi = {10.1149/2.0441908jes}, + url = {https://doi.org/10.1149%2F2.0441908jes}, + year = 2019, + publisher = {The Electrochemical Society}, + volume = {166}, + number = {12}, + pages = {A2372--A2382}, + author = {Valentin Sulzer et al.}, + title = {Faster Lead-Acid Battery Simulations from Porous-Electrode Theory}, + journal = {Journal of The Electrochemical Society}, } -}""" +""" with source.open("w") as f: f.write(ref) bibliography.add_source(source) bibliography.load_source("tests") assert "tests" in bibliography._sources_loaded + assert "Sulzer_2019" in bibliography._sources_loaded["tests"].entries_dict From 0b58cfe906e771a0b7a5bb7d580a7f4120cfc5e5 Mon Sep 17 00:00:00 2001 From: Diego Date: Thu, 3 Sep 2020 19:34:09 +0100 Subject: [PATCH 05/13] Add package attribute awareness to the parser --- docs/conf.py | 19 ++-- docs/examples/minimal-class.py | 12 ++- docs/examples/minimal-method.py | 13 +-- docs/examples/minimal.py | 10 +- r2t2/__main__.py | 33 +++--- r2t2/core.py | 10 +- r2t2/docstring_parser.py | 34 +++--- r2t2/docstring_reference_parser.py | 27 ++--- r2t2/plain_text_parser.py | 12 +-- r2t2/runtime_tracker.py | 3 +- r2t2/static_parser.py | 8 +- tests/conftest.py | 6 +- tests/test__main__.py | 59 +++-------- tests/test_core.py | 1 - tests/test_docstring_parser.py | 125 +++++++++++----------- tests/test_docstring_reference_parser.py | 129 +++++++++-------------- tests/test_plain_text_parser.py | 62 +++++------ 17 files changed, 246 insertions(+), 317 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 2072d51..9c7335e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,12 +17,12 @@ # -- Project information ----------------------------------------------------- -project = 'Research References Tracking Tool (R2T2)' -copyright = '2020, Research Computing Service, Imperial College London' -author = 'Research Computing Service, Imperial College London' +project = "Research References Tracking Tool (R2T2)" +copyright = "2020, Research Computing Service, Imperial College London" +author = "Research Computing Service, Imperial College London" # The full version, including alpha/beta/rc tags -release = '0.3.1' +release = "0.3.1" # -- General configuration --------------------------------------------------- @@ -30,16 +30,15 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = [ -] +extensions = [] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The master toctree document. master_doc = "index" @@ -49,9 +48,9 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +html_theme = "alabaster" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] diff --git a/docs/examples/minimal-class.py b/docs/examples/minimal-class.py index 420c733..2761c96 100644 --- a/docs/examples/minimal-class.py +++ b/docs/examples/minimal-class.py @@ -1,9 +1,11 @@ from r2t2 import add_reference -@add_reference(short_purpose="Original implementation of R2T2", - reference="Diego Alonso-Álvarez, et al." - "(2018, February 27). Solcore (Version 5.1.0). Zenodo." - "http://doi.org/10.5281/zenodo.1185316") -class MyGreatClass(): +@add_reference( + short_purpose="Original implementation of R2T2", + reference="Diego Alonso-Álvarez, et al." + "(2018, February 27). Solcore (Version 5.1.0). Zenodo." + "http://doi.org/10.5281/zenodo.1185316", +) +class MyGreatClass: pass diff --git a/docs/examples/minimal-method.py b/docs/examples/minimal-method.py index 7198662..74c4704 100644 --- a/docs/examples/minimal-method.py +++ b/docs/examples/minimal-method.py @@ -1,11 +1,12 @@ from r2t2 import add_reference -class MyGreatClass(): - - @add_reference(short_purpose="Original implementation of R2T2", - reference="Diego Alonso-Álvarez, et al." - "(2018, February 27). Solcore (Version 5.1.0). Zenodo." - "http://doi.org/10.5281/zenodo.1185316") +class MyGreatClass: + @add_reference( + short_purpose="Original implementation of R2T2", + reference="Diego Alonso-Álvarez, et al." + "(2018, February 27). Solcore (Version 5.1.0). Zenodo." + "http://doi.org/10.5281/zenodo.1185316", + ) def my_great_function(self): pass diff --git a/docs/examples/minimal.py b/docs/examples/minimal.py index f60fff8..cd08301 100644 --- a/docs/examples/minimal.py +++ b/docs/examples/minimal.py @@ -1,10 +1,12 @@ from r2t2 import add_reference -@add_reference(short_purpose="Original implementation of R2T2", - reference="Diego Alonso-Álvarez, et al." - "(2018, February 27). Solcore (Version 5.1.0). Zenodo." - "http://doi.org/10.5281/zenodo.1185316") +@add_reference( + short_purpose="Original implementation of R2T2", + reference="Diego Alonso-Álvarez, et al." + "(2018, February 27). Solcore (Version 5.1.0). Zenodo." + "http://doi.org/10.5281/zenodo.1185316", +) def my_great_function(): pass diff --git a/r2t2/__main__.py b/r2t2/__main__.py index c92f206..090fc06 100644 --- a/r2t2/__main__.py +++ b/r2t2/__main__.py @@ -10,7 +10,7 @@ from .writers import REGISTERED_WRITERS from .docstring_reference_parser import ( expand_file_list, - parse_and_add_docstring_references_from_files + parse_and_add_docstring_references_from_files, ) @@ -38,7 +38,7 @@ def add_common_arguments(parser: argparse.ArgumentParser): default="terminal", type=str, choices=sorted(REGISTERED_WRITERS.keys()), - help="Format of the output. Default: Terminal." + help="Format of the output. Default: Terminal.", ) parser.add_argument( "--encoding", @@ -53,11 +53,7 @@ def add_common_arguments(parser: argparse.ArgumentParser): help="File to save the references into. Ignored if format is 'Terminal'." " Default: [target folder]/references.", ) - parser.add_argument( - "--debug", - action="store_true", - help="Enable debug logging" - ) + parser.add_argument("--debug", action="store_true", help="Enable debug logging") class RunSubCommand(SubCommand): @@ -102,27 +98,24 @@ def add_arguments(self, parser: argparse.ArgumentParser): "target", default=".", type=str, - help="Target file or folder to analyse." - " Default: Current directory.", + help="Target file or folder to analyse." " Default: Current directory.", ) def run(self, args: argparse.Namespace): if args.notebook: - if not args.target.endswith('.ipynb'): - raise Exception("If --notebook flag is passed, target must be a" - " Jupyter notebook!") + if not args.target.endswith(".ipynb"): + raise Exception( + "If --notebook flag is passed, target must be a" + " Jupyter notebook!" + ) locate_references(args.target, encoding=args.encoding) if args.docstring or args.notebook: parse_and_add_docstring_references_from_files( - expand_file_list(args.target), - encoding=args.encoding + expand_file_list(args.target), encoding=args.encoding ) -SUB_COMMANDS: List[SubCommand] = [ - RunSubCommand(), - StaticSubCommand() -] +SUB_COMMANDS: List[SubCommand] = [RunSubCommand(), StaticSubCommand()] SUB_COMMAND_BY_NAME: Dict[str, SubCommand] = { sub_command.name: sub_command for sub_command in SUB_COMMANDS @@ -169,6 +162,6 @@ def main(argv: List[str] = None): run(args) -if __name__ == '__main__': - logging.basicConfig(level='INFO') +if __name__ == "__main__": + logging.basicConfig(level="INFO") main() diff --git a/r2t2/core.py b/r2t2/core.py index 25d6028..34b827b 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -117,14 +117,18 @@ def wrapper(wrapped, instance, args, kwargs): source = inspect.getsourcefile(wrapped) line = inspect.getsourcelines(wrapped)[1] identifier = f"{source}:{line}" - package = inspect.getmodule(inspect.stack()[1][0]).__name__.split(".")[0] + try: + package = inspect.getmodule(inspect.stack()[1][0]).__name__.split(".")[0] + except AttributeError: + package = "" if identifier in BIBLIOGRAPHY and ref in BIBLIOGRAPHY[identifier].references: return wrapped(*args, **kwargs) if identifier not in BIBLIOGRAPHY: - BIBLIOGRAPHY[identifier] = FunctionReference(wrapped.__name__, line, source, - package, [], []) + BIBLIOGRAPHY[identifier] = FunctionReference( + wrapped.__name__, line, source, package, [], [] + ) BIBLIOGRAPHY[identifier].short_purpose.append(short_purpose) BIBLIOGRAPHY[identifier].references.append(ref) diff --git a/r2t2/docstring_parser.py b/r2t2/docstring_parser.py index 323e3e5..14fcabb 100644 --- a/r2t2/docstring_parser.py +++ b/r2t2/docstring_parser.py @@ -13,7 +13,7 @@ """ -DEFAULT_ENCODING = 'utf-8' +DEFAULT_ENCODING = "utf-8" class CodeDocumentComment(NamedTuple): @@ -21,28 +21,31 @@ class CodeDocumentComment(NamedTuple): filename: Optional[str] = None lineno: Optional[int] = None name: Optional[str] = None + package: Optional[str] = None def iter_extract_docstring_from_text( - text: str, filename: str = None, + text: str, + filename: str = None, notebook: bool = False, ) -> Iterable[CodeDocumentComment]: - tree = ast.parse(text, filename=filename or '') + tree = ast.parse(text, filename=filename or "") for node in ast.walk(tree): - LOGGER.debug('node: %r', node) + LOGGER.debug("node: %r", node) try: node_docstring = ast.get_docstring(node) - LOGGER.debug('node_docstring: %r', node_docstring) + LOGGER.debug("node_docstring: %r", node_docstring) if node_docstring: if notebook: - lineno = 'n/a' + lineno = "n/a" else: - lineno = getattr(node, 'lineno', 1) + lineno = getattr(node, "lineno", 1) yield CodeDocumentComment( filename=filename, lineno=lineno, - name=getattr(node, 'name', None), - text=node_docstring + name=getattr(node, "name", None), + text=node_docstring, + package="", ) except TypeError: # node type may not be able to have docstrings @@ -50,14 +53,13 @@ def iter_extract_docstring_from_text( def iter_extract_docstring_from_lines( - lines: Iterable[str] + lines: Iterable[str], ) -> Iterable[CodeDocumentComment]: - return iter_extract_docstring_from_text('\n'.join(lines)) + return iter_extract_docstring_from_text("\n".join(lines)) def iter_extract_docstring_from_file( - path: Union[str, Path], - encoding: str = DEFAULT_ENCODING + path: Union[str, Path], encoding: str = DEFAULT_ENCODING ) -> Iterable[CodeDocumentComment]: path = Path(path) txt = path.read_text(encoding=encoding) @@ -72,13 +74,11 @@ def iter_extract_docstring_from_file( txt.append(FAKE_FUNC.format(i, " ".join(c["source"]))) txt = "\n".join(txt) notebook = True - return iter_extract_docstring_from_text(txt, filename=str(path), - notebook=notebook) + return iter_extract_docstring_from_text(txt, filename=str(path), notebook=notebook) def iter_extract_docstring_from_files( - paths: Iterable[Union[str, Path]], - **kwargs + paths: Iterable[Union[str, Path]], **kwargs ) -> Iterable[CodeDocumentComment]: for path in paths: yield from iter_extract_docstring_from_file(path, **kwargs) diff --git a/r2t2/docstring_reference_parser.py b/r2t2/docstring_reference_parser.py index 03dcc0b..321e39b 100644 --- a/r2t2/docstring_reference_parser.py +++ b/r2t2/docstring_reference_parser.py @@ -5,17 +5,14 @@ from r2t2.core import Biblio, BIBLIOGRAPHY, FunctionReference from r2t2.plain_text_parser import iter_parse_plain_text_references -from r2t2.docstring_parser import ( - CodeDocumentComment, - iter_extract_docstring_from_files -) +from r2t2.docstring_parser import CodeDocumentComment, iter_extract_docstring_from_files LOGGER = logging.getLogger(__name__) -DOCSTRING_SHORT_PURPOSE = 'automatically parsed from docstring' -NOTEBOOK_SHORT_PURPOSE = 'automatically parsed from markdown cell' +DOCSTRING_SHORT_PURPOSE = "automatically parsed from docstring" +NOTEBOOK_SHORT_PURPOSE = "automatically parsed from markdown cell" def expand_file_list(path: Union[Path, str]) -> List[Path]: @@ -34,25 +31,25 @@ def get_function_reference_identifier(function_reference: FunctionReference) -> def get_function_reference_from_docstring( - docstring: CodeDocumentComment + docstring: CodeDocumentComment, ) -> FunctionReference: references = list(iter_parse_plain_text_references(docstring.text)) - if docstring.lineno != 'n/a': + if docstring.lineno != "n/a": purpose = DOCSTRING_SHORT_PURPOSE else: purpose = NOTEBOOK_SHORT_PURPOSE return FunctionReference( - source=docstring.filename or '', + source=docstring.filename or "", line=docstring.lineno or 0, - name=docstring.name or '', + package=docstring.package or "", + name=docstring.name or "", references=references, - short_purpose=[purpose] * len(references) + short_purpose=[purpose] * len(references), ) def iter_parse_docstring_function_references_from_files( - filenames: Iterable[Union[str, Path]], - **kwargs + filenames: Iterable[Union[str, Path]], **kwargs ) -> Iterable[Tuple[str, FunctionReference]]: for docstring in iter_extract_docstring_from_files(filenames, **kwargs): function_reference = get_function_reference_from_docstring(docstring) @@ -62,9 +59,7 @@ def iter_parse_docstring_function_references_from_files( def parse_and_add_docstring_references_from_files( - filenames: Iterable[Union[str, Path]], - biblio: Biblio = None, - **kwargs + filenames: Iterable[Union[str, Path]], biblio: Biblio = None, **kwargs ): if biblio is None: biblio = BIBLIOGRAPHY diff --git a/r2t2/plain_text_parser.py b/r2t2/plain_text_parser.py index 348bf6d..9434335 100644 --- a/r2t2/plain_text_parser.py +++ b/r2t2/plain_text_parser.py @@ -3,26 +3,26 @@ from typing import Iterable, List -DOI_URL_HTTPS_PREFIX = 'https://doi.org/' +DOI_URL_HTTPS_PREFIX = "https://doi.org/" def iter_doi(text: str) -> Iterable[str]: - for m in re.findall(r'\b10\.\d{4,}/\S+', text): + for m in re.findall(r"\b10\.\d{4,}/\S+", text): yield DOI_URL_HTTPS_PREFIX + str(m) def iter_sphinx_reference_names(text: str) -> Iterable[str]: - for m in re.finditer(r':cite:`([^`]+)`', text): + for m in re.finditer(r":cite:`([^`]+)`", text): yield m.group(1) def iter_latex_reference_names(text: str) -> Iterable[str]: - for m in re.finditer(r'\\cite(?:\[[^\]]*\])?{([^}]+)}', text): + for m in re.finditer(r"\\cite(?:\[[^\]]*\])?{([^}]+)}", text): yield m.group(1) def iter_doxygen_reference_names(text: str) -> Iterable[str]: - for m in re.finditer(r'\\cite\s(\S+)', text): + for m in re.finditer(r"\\cite\s(\S+)", text): yield m.group(1) @@ -33,7 +33,7 @@ def iter_parse_plain_text_raw_bib_references(text: str) -> Iterable[str]: def iter_parse_plain_text_bib_references(text: str) -> Iterable[str]: for raw_reference in iter_parse_plain_text_raw_bib_references(text): - for ref_name in raw_reference.split(','): + for ref_name in raw_reference.split(","): yield ref_name.strip() diff --git a/r2t2/runtime_tracker.py b/r2t2/runtime_tracker.py index 34f6bf2..f8c80c7 100644 --- a/r2t2/runtime_tracker.py +++ b/r2t2/runtime_tracker.py @@ -17,8 +17,7 @@ def runtime_tracker(script: str, args: List[str], encoding: str): try: LOGGER.debug( - "loading script: %s (args: %s, encoding: %s)", - script, args, encoding + "loading script: %s (args: %s, encoding: %s)", script, args, encoding ) with open(script, encoding=encoding) as fp: code = compile(fp.read(), script, "exec") diff --git a/r2t2/static_parser.py b/r2t2/static_parser.py index 4071ea0..1b71e3b 100644 --- a/r2t2/static_parser.py +++ b/r2t2/static_parser.py @@ -15,7 +15,7 @@ class FileReferenceParseError(FileParseError): pass -DEFAULT_ENCODING = 'utf-8' +DEFAULT_ENCODING = "utf-8" def locate_references(path: Union[Path, str], encoding: str = DEFAULT_ENCODING): @@ -65,7 +65,7 @@ def locate_references_in_file(filename: Union[Path, str], encoding: str): raise except Exception as exc: raise FileParseError( - 'failed to process %s due to %r' % (filename, exc) + "failed to process %s due to %r" % (filename, exc) ) from exc @@ -81,7 +81,7 @@ def parse_references( try: name = re.findall(r"[\w']+", current)[1] - BIBLIOGRAPHY[identifier] = FunctionReference(name, line_num, source, [], []) + BIBLIOGRAPHY[identifier] = FunctionReference(name, line_num, source, "", [], []) def add_ref(i, j): one_ref = " ".join(ref_raw[i:j]).replace("@", "_") @@ -93,5 +93,5 @@ def add_ref(i, j): reduce(add_ref, ref_lines) except Exception as exc: raise FileReferenceParseError( - 'failed to process %s due to %r' % (identifier, exc) + "failed to process %s due to %r" % (identifier, exc) ) from exc diff --git a/tests/conftest.py b/tests/conftest.py index fbb1f90..7f0c3e7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -45,10 +45,10 @@ def a_great_function(): return a_great_function -@pytest.fixture(scope='session', autouse=True) +@pytest.fixture(scope="session", autouse=True) def setup_logging(): - for name in {'r2t2', 'tests'}: - logging.getLogger(name).setLevel('DEBUG') + for name in {"r2t2", "tests"}: + logging.getLogger(name).setLevel("DEBUG") @pytest.fixture() diff --git a/tests/test__main__.py b/tests/test__main__.py index 0e28594..ef76380 100644 --- a/tests/test__main__.py +++ b/tests/test__main__.py @@ -1,69 +1,36 @@ import pytest -from r2t2.__main__ import ( - parse_args, - main -) +from r2t2.__main__ import parse_args, main -VALID_FORMAT = 'markdown' -INVALID_FORMAT = 'other' +VALID_FORMAT = "markdown" +INVALID_FORMAT = "other" class TestParseArgs: def test_should_not_fail_on_valid_format(self): - args = parse_args([ - 'run', - '--format=%s' % VALID_FORMAT, - 'docs/examples' - ]) + args = parse_args(["run", "--format=%s" % VALID_FORMAT, "docs/examples"]) assert args.format == VALID_FORMAT def test_should_fail_on_invalid_format(self): with pytest.raises(SystemExit): - parse_args([ - 'run', - '--format=%s' % INVALID_FORMAT, - 'docs/examples' - ]) + parse_args(["run", "--format=%s" % INVALID_FORMAT, "docs/examples"]) -@pytest.mark.usefixtures('bibliography') +@pytest.mark.usefixtures("bibliography") class TestMain: - def test_should_not_fail_on_static_analysis_of_examples_including_docstrings( - self - ): - main([ - 'static', - '--docstring', - 'docs/examples' - ]) + def test_should_not_fail_on_static_analysis_of_examples_including_docstrings(self): + main(["static", "--docstring", "docs/examples"]) - def test_should_not_fail_on_static_analysis_of_examples_ignoring_docstrings( - self - ): - main([ - 'static', - 'docs/examples' - ]) + def test_should_not_fail_on_static_analysis_of_examples_ignoring_docstrings(self): + main(["static", "docs/examples"]) def test_should_not_fail_on_runtime_analysis_of_examples(self): - main([ - 'run', - 'docs/examples/minimal.py' - ]) + main(["run", "docs/examples/minimal.py"]) def test_should_not_fail_on_notebook(self): - main([ - 'static', - '--notebook', - 'tests/fixtures/notebook_doi.ipynb' - ]) + main(["static", "--notebook", "tests/fixtures/notebook_doi.ipynb"]) def test_should_fail_on_non_notebook(self): with pytest.raises(Exception): - main([ - 'static', - '--notebook', - 'docs/examples/docstring_doi.py' - ]) + main(["static", "--notebook", "docs/examples/docstring_doi.py"]) diff --git a/tests/test_core.py b/tests/test_core.py index 5733a3b..ad83aa3 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -86,7 +86,6 @@ def test_add_reference_from_doi(self, bib_with_tracking, decorated_with_doi): class TestAddSource: - def test_add_source_exception_if_not_bibtex(self, bibliography, tmp_path): source = tmp_path / "my_source" with raises(ValueError): diff --git a/tests/test_docstring_parser.py b/tests/test_docstring_parser.py index d3c0baf..9071e34 100644 --- a/tests/test_docstring_parser.py +++ b/tests/test_docstring_parser.py @@ -4,12 +4,12 @@ CodeDocumentComment, iter_extract_docstring_from_lines, iter_extract_docstring_from_file, - iter_extract_docstring_from_files + iter_extract_docstring_from_files, ) -DOC_STRING_LINE_1 = 'the docstring line 1' -DOC_STRING_LINE_2 = 'the docstring line 2' +DOC_STRING_LINE_1 = "the docstring line 1" +DOC_STRING_LINE_2 = "the docstring line 2" class TestIterExtractDocstringFromLines: @@ -17,70 +17,75 @@ def test_should_extract_no_docstrings_from_empty_file(self): assert list(iter_extract_docstring_from_lines([])) == [] def test_should_extract_no_docstrings_from_file_without_docstrings(self): - assert list(iter_extract_docstring_from_lines([ - 'def some_function():', - ' pass' - ])) == [] + assert ( + list( + iter_extract_docstring_from_lines(["def some_function():", " pass"]) + ) + == [] + ) def test_should_extract_module_level_docstring_using_double_quotes(self): - assert list(iter_extract_docstring_from_lines([ - '"""', - DOC_STRING_LINE_1, - DOC_STRING_LINE_2, - '"""' - ])) == [CodeDocumentComment( - name=None, - lineno=1, - text='\n'.join([ - DOC_STRING_LINE_1, - DOC_STRING_LINE_2 - ]) - )] + assert list( + iter_extract_docstring_from_lines( + ['"""', DOC_STRING_LINE_1, DOC_STRING_LINE_2, '"""'] + ) + ) == [ + CodeDocumentComment( + name=None, + lineno=1, + text="\n".join([DOC_STRING_LINE_1, DOC_STRING_LINE_2]), + package="", + ) + ] def test_should_extract_function_level_docstring_using_double_quotes(self): - assert list(iter_extract_docstring_from_lines([ - 'def some_function():', - ' """', - ' ' + DOC_STRING_LINE_1, - ' ' + DOC_STRING_LINE_2, - ' """', - ' pass' - ])) == [CodeDocumentComment( - name='some_function', - lineno=1, - text='\n'.join([ - DOC_STRING_LINE_1, - DOC_STRING_LINE_2 - ]) - )] + assert list( + iter_extract_docstring_from_lines( + [ + "def some_function():", + ' """', + " " + DOC_STRING_LINE_1, + " " + DOC_STRING_LINE_2, + ' """', + " pass", + ] + ) + ) == [ + CodeDocumentComment( + name="some_function", + lineno=1, + text="\n".join([DOC_STRING_LINE_1, DOC_STRING_LINE_2]), + package="", + ) + ] class TestIterExtractDocstringFromFile: def test_should_extract_module_level_docstring_using_double_quotes( self, temp_dir: Path ): - file_path = temp_dir / 'test.py' - file_path.write_text('\n'.join([ - '"""', - DOC_STRING_LINE_1, - DOC_STRING_LINE_2, - '"""' - ])) - expected_docstrings = [CodeDocumentComment( - filename=str(file_path), - name=None, - lineno=1, - text='\n'.join([ - DOC_STRING_LINE_1, - DOC_STRING_LINE_2 - ]) - )] - assert list(iter_extract_docstring_from_file( - str(file_path) - )) == expected_docstrings - assert list(iter_extract_docstring_from_file( - Path(file_path) - )) == expected_docstrings - assert list(iter_extract_docstring_from_files( - [str(file_path)] - )) == expected_docstrings + file_path = temp_dir / "test.py" + file_path.write_text( + "\n".join(['"""', DOC_STRING_LINE_1, DOC_STRING_LINE_2, '"""']) + ) + expected_docstrings = [ + CodeDocumentComment( + filename=str(file_path), + name=None, + lineno=1, + text="\n".join([DOC_STRING_LINE_1, DOC_STRING_LINE_2]), + package="", + ) + ] + assert ( + list(iter_extract_docstring_from_file(str(file_path))) + == expected_docstrings + ) + assert ( + list(iter_extract_docstring_from_file(Path(file_path))) + == expected_docstrings + ) + assert ( + list(iter_extract_docstring_from_files([str(file_path)])) + == expected_docstrings + ) diff --git a/tests/test_docstring_reference_parser.py b/tests/test_docstring_reference_parser.py index 3ac5028..e9df904 100644 --- a/tests/test_docstring_reference_parser.py +++ b/tests/test_docstring_reference_parser.py @@ -4,125 +4,94 @@ from r2t2.docstring_reference_parser import ( DOCSTRING_SHORT_PURPOSE, NOTEBOOK_SHORT_PURPOSE, - parse_and_add_docstring_references_from_files + parse_and_add_docstring_references_from_files, ) -DOI_URL_HTTPS_PREFIX = 'https://doi.org/' +DOI_URL_HTTPS_PREFIX = "https://doi.org/" HERE = Path(__file__).parent FIXTURES = HERE / "fixtures" -DOI_1 = '10.1234/zenodo.1234567' -DOI_2 = '10.5281/zenodo.1185316' -DOI_3 = '10.3435/zenodo.1579823' +DOI_1 = "10.1234/zenodo.1234567" +DOI_2 = "10.5281/zenodo.1185316" +DOI_3 = "10.3435/zenodo.1579823" class TestParseAndAddDocstringReferencesFromFiles: - def test_should_parse_docstring_reference( - self, temp_dir: Path - ): - file_path = temp_dir / 'test.py' - file_path.write_text('\n'.join([ - 'def some_function():' - ' """', - ' ' + DOI_1, - ' """' - ])) - biblio = Biblio() - parse_and_add_docstring_references_from_files( - [file_path], - biblio=biblio + def test_should_parse_docstring_reference(self, temp_dir: Path): + file_path = temp_dir / "test.py" + file_path.write_text( + "\n".join(["def some_function():" ' """', " " + DOI_1, ' """']) ) - expected_identifier = '{source}:{name}:{line_num}'.format( + biblio = Biblio() + parse_and_add_docstring_references_from_files([file_path], biblio=biblio) + expected_identifier = "{source}:{name}:{line_num}".format( source=str(file_path), - name='some_function', + name="some_function", line_num=1, ) assert biblio.keys() == {expected_identifier} function_reference = biblio[expected_identifier] - assert function_reference.name == 'some_function' + assert function_reference.name == "some_function" assert function_reference.source == str(file_path) assert function_reference.line == 1 assert function_reference.references == [DOI_URL_HTTPS_PREFIX + DOI_1] assert function_reference.short_purpose == [DOCSTRING_SHORT_PURPOSE] - def test_should_not_override_existing_reference( - self, temp_dir: Path - ): - file_path = temp_dir / 'test.py' - file_path.write_text('\n'.join([ - 'def some_function():' - ' """', - ' ' + DOI_1, - ' """' - ])) - expected_identifier = '{source}:{name}:{line_num}'.format( + def test_should_not_override_existing_reference(self, temp_dir: Path): + file_path = temp_dir / "test.py" + file_path.write_text( + "\n".join(["def some_function():" ' """', " " + DOI_1, ' """']) + ) + expected_identifier = "{source}:{name}:{line_num}".format( source=str(file_path), - name='some_function', + name="some_function", line_num=1, ) biblio = Biblio() existing_function_reference = FunctionReference( - name='other', - source='other.py', + name="other", + source="other.py", line=-1, - short_purpose=['For testing'], - references=['test/123'] + package=",", + short_purpose=["For testing"], + references=["test/123"], ) biblio[expected_identifier] = existing_function_reference - parse_and_add_docstring_references_from_files( - [file_path], - biblio=biblio - ) + parse_and_add_docstring_references_from_files([file_path], biblio=biblio) assert biblio == {expected_identifier: existing_function_reference} - def test_should_not_add_function_reference_without_references( - self, temp_dir: Path - ): - file_path = temp_dir / 'test.py' - file_path.write_text('\n'.join([ - 'def some_function():' - ' """', - ' some docstring', - ' """' - ])) - biblio = Biblio() - parse_and_add_docstring_references_from_files( - [file_path], - biblio=biblio + def test_should_not_add_function_reference_without_references(self, temp_dir: Path): + file_path = temp_dir / "test.py" + file_path.write_text( + "\n".join( + ["def some_function():" ' """', " some docstring", ' """'] + ) ) + biblio = Biblio() + parse_and_add_docstring_references_from_files([file_path], biblio=biblio) assert not biblio - def test_should_parse_notebook_references( - self - ): + def test_should_parse_notebook_references(self): file_path = FIXTURES / "notebook_doi.ipynb" biblio = Biblio() - parse_and_add_docstring_references_from_files( - [file_path], - biblio=biblio - ) - identifiers = '{source}:{name}:{line_num}' - names = ['cell_0', 'cell_4'] + parse_and_add_docstring_references_from_files([file_path], biblio=biblio) + identifiers = "{source}:{name}:{line_num}" + names = ["cell_0", "cell_4"] dois = [[DOI_2, DOI_3], [DOI_1]] expected_identifiers = [ - identifiers.format( - source=str(file_path), - name=names[0], - line_num='n/a' - ), - identifiers.format( - source=str(file_path), - name=names[1], - line_num='n/a' - ), + identifiers.format(source=str(file_path), name=names[0], line_num="n/a"), + identifiers.format(source=str(file_path), name=names[1], line_num="n/a"), ] assert biblio.keys() == set(expected_identifiers) for i, identifier in enumerate(expected_identifiers): function_reference = biblio[identifier] assert function_reference.name == names[i] assert function_reference.source == str(file_path) - assert function_reference.line == 'n/a' - assert function_reference.references == [DOI_URL_HTTPS_PREFIX + d - for d in dois[i]] - assert function_reference.short_purpose == ([NOTEBOOK_SHORT_PURPOSE] - * len(dois[i])) + assert function_reference.line == "n/a" + assert function_reference.package == "" + assert function_reference.references == [ + DOI_URL_HTTPS_PREFIX + d for d in dois[i] + ] + assert function_reference.short_purpose == ( + [NOTEBOOK_SHORT_PURPOSE] * len(dois[i]) + ) diff --git a/tests/test_plain_text_parser.py b/tests/test_plain_text_parser.py index 34c7354..5ab329f 100644 --- a/tests/test_plain_text_parser.py +++ b/tests/test_plain_text_parser.py @@ -1,70 +1,64 @@ -from r2t2.plain_text_parser import ( - parse_plain_text_references -) +from r2t2.plain_text_parser import parse_plain_text_references -DOI_URL_HTTP_PREFIX = 'http://doi.org/' -DOI_URL_HTTPS_PREFIX = 'https://doi.org/' +DOI_URL_HTTP_PREFIX = "http://doi.org/" +DOI_URL_HTTPS_PREFIX = "https://doi.org/" -DOI_1 = '10.1234/zenodo.1234567' +DOI_1 = "10.1234/zenodo.1234567" -REF_NAME_1 = 'Smith2001-1' -REF_NAME_2 = 'Johnson2002-2' +REF_NAME_1 = "Smith2001-1" +REF_NAME_2 = "Johnson2002-2" class TestParsePlainTextReferences: def test_should_return_empty_list_for_empty_string(self): - assert parse_plain_text_references('') == [] + assert parse_plain_text_references("") == [] def test_should_return_empty_list_for_text_without_reference(self): - assert parse_plain_text_references('description of some function') == [] + assert parse_plain_text_references("description of some function") == [] def test_should_parse_doi_without_additional_text(self): - assert parse_plain_text_references( - DOI_1 - ) == [DOI_URL_HTTPS_PREFIX + DOI_1] + assert parse_plain_text_references(DOI_1) == [DOI_URL_HTTPS_PREFIX + DOI_1] def test_should_parse_doi_with_surround_text(self): - assert parse_plain_text_references( - 'DOI: ' + DOI_1 + ' used for xyz' - ) == [DOI_URL_HTTPS_PREFIX + DOI_1] + assert parse_plain_text_references("DOI: " + DOI_1 + " used for xyz") == [ + DOI_URL_HTTPS_PREFIX + DOI_1 + ] def test_should_parse_doi_http_url(self): - assert parse_plain_text_references( - DOI_URL_HTTP_PREFIX + DOI_1 - ) == [DOI_URL_HTTPS_PREFIX + DOI_1] + assert parse_plain_text_references(DOI_URL_HTTP_PREFIX + DOI_1) == [ + DOI_URL_HTTPS_PREFIX + DOI_1 + ] def test_should_parse_doi_https_url(self): - assert parse_plain_text_references( - DOI_URL_HTTP_PREFIX + DOI_1 - ) == [DOI_URL_HTTPS_PREFIX + DOI_1] + assert parse_plain_text_references(DOI_URL_HTTP_PREFIX + DOI_1) == [ + DOI_URL_HTTPS_PREFIX + DOI_1 + ] def test_should_parse_single_sphinx_cite_reference(self): assert parse_plain_text_references( - r'See :cite:`' + REF_NAME_1 + r'` for an introduction' + r"See :cite:`" + REF_NAME_1 + r"` for an introduction" ) == [REF_NAME_1] def test_should_parse_multiple_sphinx_cite_references(self): assert parse_plain_text_references( - r'See :cite:`' + REF_NAME_1 + ', ' + REF_NAME_2 + r'` for an introduction' + r"See :cite:`" + REF_NAME_1 + ", " + REF_NAME_2 + r"` for an introduction" ) == [REF_NAME_1, REF_NAME_2] def test_should_parse_single_latex_cite_reference(self): - assert parse_plain_text_references( - r'\\cite{' + REF_NAME_1 + r'}' - ) == [REF_NAME_1] + assert parse_plain_text_references(r"\\cite{" + REF_NAME_1 + r"}") == [ + REF_NAME_1 + ] def test_should_parse_multiple_latex_cite_reference(self): assert parse_plain_text_references( - r'\\cite{' + REF_NAME_1 + ', ' + REF_NAME_2 + r'}' + r"\\cite{" + REF_NAME_1 + ", " + REF_NAME_2 + r"}" ) == [REF_NAME_1, REF_NAME_2] def test_should_parse_latex_cite_reference_with_page_number(self): - assert parse_plain_text_references( - r'\\cite[p.~215]{' + REF_NAME_1 + r'}' - ) == [REF_NAME_1] + assert parse_plain_text_references(r"\\cite[p.~215]{" + REF_NAME_1 + r"}") == [ + REF_NAME_1 + ] def test_should_parse_single_doxygen_cite_reference(self): - assert parse_plain_text_references( - r'\\cite ' + REF_NAME_1 - ) == [REF_NAME_1] + assert parse_plain_text_references(r"\\cite " + REF_NAME_1) == [REF_NAME_1] From 23bb1bc8151ef17e54e91f38e0478070182cd28e Mon Sep 17 00:00:00 2001 From: Diego Date: Thu, 3 Sep 2020 20:46:25 +0100 Subject: [PATCH 06/13] Add process references --- r2t2/core.py | 42 +++++++++++++++++++++++++++++++++++++++--- r2t2/doi2bib.py | 19 +++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 r2t2/doi2bib.py diff --git a/r2t2/core.py b/r2t2/core.py index e36ab0b..5f94efa 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -3,9 +3,12 @@ from typing import NamedTuple, List, Optional, Callable, Dict, Union from functools import reduce from pathlib import Path +from warnings import warn import bibtexparser as bp +from .doi2bib import doi2bib + class FunctionReference(NamedTuple): name: str @@ -88,10 +91,43 @@ def load_source(self, package: str) -> None: with self._sources[package].open() as f: self._sources_loaded[package] = bp.load(f) - def save_source(self, package: str) -> None: - """Saves the (possibly modified) source for the given package.""" + def add_entry_to_source(self, entry: dict, package: str) -> None: + """Add entry to source and save it source for the given package.""" + self._sources_loaded[package].entries.append(entry) with self._sources[package].open() as f: - bp.dump(self._sources_loaded[package], f) + bp.dump(self._sources_loaded[package], f) + + def process_ref(self, ref: FunctionReference) -> str: + if ref.package not in self._sources_loaded: + self.load_source(ref.package) + + for refstr in ref.references: + if refstr.startswith("[plain]"): + return refstr.strip("[plain]") + + elif refstr.startswith("[bibkey]"): + return self._sources_loaded[ref.package].entries_dict[ + refstr.strip("[bibkey]") + ] + + elif refstr.startswith("[doi]"): + for entry in self._sources_loaded[ref.package].entries: + out = entry if entry.get("doi") == refstr.strip("[doi]") else None + if out: + db = bp.bibdatabase.BibDatabase() + db.entries = [out] + return bp.dumps(db) + + out = doi2bib(refstr.strip("[doi]")) + if out: + self.add_entry_to_source(bp.loads(out), ref.package) + return out + + warn( + f"Reference with doi={refstr.strip('[doi]')} not found!", + UserWarning, + ) + return "" BIBLIOGRAPHY: Biblio = Biblio() diff --git a/r2t2/doi2bib.py b/r2t2/doi2bib.py new file mode 100644 index 0000000..c48c1b4 --- /dev/null +++ b/r2t2/doi2bib.py @@ -0,0 +1,19 @@ +"""The following code has been adapted from: + +https://gist.github.com/jrsmith3/5513926 +""" + +import requests +from warnings import warn +from typing import Optional + + +def doi2bib(doi) -> Optional[str]: + """Return a bibTeX string of metadata for a given DOI.""" + url = doi if "doi.org" in doi else f"https://doi.org/{doi}" + headers = {"accept": "application/x-bibtex"} + r = requests.get(url, headers=headers) + if "DOI Not Found" in r.text: + warn(f"Reference with doi={doi} not found!", UserWarning) + return None + return r.text From cad960bdb027f6684e9a0058fc1079a90e793f5b Mon Sep 17 00:00:00 2001 From: Diego Date: Thu, 3 Sep 2020 20:55:34 +0100 Subject: [PATCH 07/13] Change output of process_ref to list --- r2t2/core.py | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/r2t2/core.py b/r2t2/core.py index 5f94efa..93c15d3 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -97,37 +97,45 @@ def add_entry_to_source(self, entry: dict, package: str) -> None: with self._sources[package].open() as f: bp.dump(self._sources_loaded[package], f) - def process_ref(self, ref: FunctionReference) -> str: + def process_ref(self, ref: FunctionReference) -> List[str]: + """Process the reference keys and retrieves the full information.""" if ref.package not in self._sources_loaded: self.load_source(ref.package) + processed = [] for refstr in ref.references: if refstr.startswith("[plain]"): - return refstr.strip("[plain]") + processed.append(refstr.strip("[plain]")) - elif refstr.startswith("[bibkey]"): - return self._sources_loaded[ref.package].entries_dict[ - refstr.strip("[bibkey]") - ] + elif refstr.startswith("[bibtex]"): + processed.append(self._sources_loaded[ref.package].entries_dict[ + refstr.strip("[bibtex]") + ]) elif refstr.startswith("[doi]"): + out = None for entry in self._sources_loaded[ref.package].entries: out = entry if entry.get("doi") == refstr.strip("[doi]") else None if out: db = bp.bibdatabase.BibDatabase() db.entries = [out] - return bp.dumps(db) + processed.append(bp.dumps(db)) + break - out = doi2bib(refstr.strip("[doi]")) if out: - self.add_entry_to_source(bp.loads(out), ref.package) - return out + continue + else: + out = doi2bib(refstr.strip("[doi]")) + if out: + self.add_entry_to_source(bp.loads(out), ref.package) + processed.append(out) + continue warn( f"Reference with doi={refstr.strip('[doi]')} not found!", UserWarning, ) - return "" + return [""] BIBLIOGRAPHY: Biblio = Biblio() From 7bc551b3bc8f877e9f6402829be5fc334a206043 Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 6 Sep 2020 16:11:04 +0100 Subject: [PATCH 08/13] Add register_process decorator --- r2t2/core.py | 131 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 96 insertions(+), 35 deletions(-) diff --git a/r2t2/core.py b/r2t2/core.py index 93c15d3..2b01edc 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -1,7 +1,7 @@ import inspect import wrapt from typing import NamedTuple, List, Optional, Callable, Dict, Union -from functools import reduce +from functools import reduce, partial from pathlib import Path from warnings import warn @@ -21,6 +21,26 @@ class FunctionReference(NamedTuple): class Biblio(dict): track_references: bool = False + _processes: Dict[str, Callable] = {} + + @classmethod + def register_process(cls, f: Optional[Callable] = None, name: Optional[str] = None): + """Register a function for processing references in the registry. + + Args: + f: Function to process references. + name (str): Name of the type of reference to process, eg. plain, doi... + + Returns: + The same input function + """ + if f is None: + return partial(cls.register_process, name=name) + + name = name if name else f.__name__ + + cls._processes[name] = f + return f def __init__(self): super().__init__() @@ -91,6 +111,12 @@ def load_source(self, package: str) -> None: with self._sources[package].open() as f: self._sources_loaded[package] = bp.load(f) + def get_source(self, package: str) -> bp.bibdatabase.BibDatabase: + """Provide the requested sources database.""" + if package not in self._sources_loaded: + self.load_source(package) + return self._sources_loaded[package] + def add_entry_to_source(self, entry: dict, package: str) -> None: """Add entry to source and save it source for the given package.""" self._sources_loaded[package].entries.append(entry) @@ -99,43 +125,14 @@ def add_entry_to_source(self, entry: dict, package: str) -> None: def process_ref(self, ref: FunctionReference) -> List[str]: """Process the reference keys and retrieves the full information.""" - if ref.package not in self._sources_loaded: - self.load_source(ref.package) + self.get_source(ref.package) processed = [] for refstr in ref.references: - if refstr.startswith("[plain]"): - processed.append(refstr.strip("[plain]")) - - elif refstr.startswith("[bibtex]"): - processed.append(self._sources_loaded[ref.package].entries_dict[ - refstr.strip("[bibtex]") - ]) - - elif refstr.startswith("[doi]"): - out = None - for entry in self._sources_loaded[ref.package].entries: - out = entry if entry.get("doi") == refstr.strip("[doi]") else None - if out: - db = bp.bibdatabase.BibDatabase() - db.entries = [out] - processed.append(bp.dumps(db)) - break - - if out: - continue - else: - out = doi2bib(refstr.strip("[doi]")) - if out: - self.add_entry_to_source(bp.loads(out), ref.package) - processed.append(out) - continue - - warn( - f"Reference with doi={refstr.strip('[doi]')} not found!", - UserWarning, - ) - return [""] + rtype, rstr = refstr.strip("[").split("]", 1) + processed.append(self._processes[rtype](rstr, ref.package)) + + return processed BIBLIOGRAPHY: Biblio = Biblio() @@ -194,3 +191,67 @@ def wrapper(wrapped, instance, args, kwargs): return wrapped(*args, **kwargs) return wrapper + + +@Biblio.register_process("plain") +def process_plain(ref: str, *args, **kwargs) -> str: + """ Process a plain string reference. Dummy function. + + Args: + ref (str): The input reference string + + Returns: + The same input string. + """ + return ref + + +@Biblio.register_process("bibtex") +def process_bibtex(ref: str, package: str, *args, **kwargs) -> str: + """ Process a bibtex key reference. + + Args: + ref (str): The bibtex key. + package (str): The package from where to get the reference from. + + Raises: + KeyError: If the reference source for that package does not contain the + requested key. + Returns: + A dictionary with the reference full information + """ + return BIBLIOGRAPHY.get_source(package).entries_dict[ref] + + +@Biblio.register_process("doi") +def process_doi(ref: str, package: str, *args, **kwargs) -> str: + """ Process a doi key reference. + + First, it will look for the reference in the database for the given package. If it + is not found there, it will retrieved it from the internet. If successful, the + reference will be added to the database, so future requests to access this reference + will be local. + + Args: + ref (str): The doi of the reference. + package (str): The package from where to get the reference from in the first + instance and where to save the reference after getting it from the internet. + + Returns: + A dictionary with the reference full information + """ + db = BIBLIOGRAPHY.get_source(package) + for entry in db.entries: + out = entry if entry.get("doi") == ref else None + if out: + return out + + out = doi2bib(ref) + if out: + BIBLIOGRAPHY.add_entry_to_source(bp.loads(out), package) + return db.entries[-1] + + warn( + f"Reference with doi={ref} not found!", UserWarning, + ) + return "" From 42a12e2f6c86919394a8b108113939b1731715bf Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 6 Sep 2020 16:17:10 +0100 Subject: [PATCH 09/13] Modify process_plain to return a dict --- r2t2/core.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/r2t2/core.py b/r2t2/core.py index 2b01edc..ddf5839 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -123,7 +123,7 @@ def add_entry_to_source(self, entry: dict, package: str) -> None: with self._sources[package].open() as f: bp.dump(self._sources_loaded[package], f) - def process_ref(self, ref: FunctionReference) -> List[str]: + def process_ref(self, ref: FunctionReference) -> List[Dict]: """Process the reference keys and retrieves the full information.""" self.get_source(ref.package) @@ -194,20 +194,21 @@ def wrapper(wrapped, instance, args, kwargs): @Biblio.register_process("plain") -def process_plain(ref: str, *args, **kwargs) -> str: +def process_plain(ref: str, *args, **kwargs) -> Dict: """ Process a plain string reference. Dummy function. Args: ref (str): The input reference string Returns: - The same input string. + A dictionary with the reference string as "title", a unique ID equal to the hash + of the reference string and an "ENTRYTYPE" equal to "misc". """ - return ref + return {"ID": hash(ref), "ENTRYTYPE": "misc", "title": ref} @Biblio.register_process("bibtex") -def process_bibtex(ref: str, package: str, *args, **kwargs) -> str: +def process_bibtex(ref: str, package: str, *args, **kwargs) -> Dict: """ Process a bibtex key reference. Args: @@ -224,7 +225,7 @@ def process_bibtex(ref: str, package: str, *args, **kwargs) -> str: @Biblio.register_process("doi") -def process_doi(ref: str, package: str, *args, **kwargs) -> str: +def process_doi(ref: str, package: str, *args, **kwargs) -> Dict: """ Process a doi key reference. First, it will look for the reference in the database for the given package. If it @@ -254,4 +255,4 @@ def process_doi(ref: str, package: str, *args, **kwargs) -> str: warn( f"Reference with doi={ref} not found!", UserWarning, ) - return "" + return {} From 93414fa5940eb52cda9ce81ef8ea002f3abba9d6 Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 6 Sep 2020 16:19:55 +0100 Subject: [PATCH 10/13] Change warning to error in process_doi --- r2t2/core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/r2t2/core.py b/r2t2/core.py index ddf5839..52bd17d 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -218,6 +218,7 @@ def process_bibtex(ref: str, package: str, *args, **kwargs) -> Dict: Raises: KeyError: If the reference source for that package does not contain the requested key. + Returns: A dictionary with the reference full information """ @@ -236,7 +237,10 @@ def process_doi(ref: str, package: str, *args, **kwargs) -> Dict: Args: ref (str): The doi of the reference. package (str): The package from where to get the reference from in the first - instance and where to save the reference after getting it from the internet. + instance and where to save the reference after getting it online. + + Raises: + KeyError: If the reference is not found neither in the database nor online. Returns: A dictionary with the reference full information @@ -252,7 +256,4 @@ def process_doi(ref: str, package: str, *args, **kwargs) -> Dict: BIBLIOGRAPHY.add_entry_to_source(bp.loads(out), package) return db.entries[-1] - warn( - f"Reference with doi={ref} not found!", UserWarning, - ) - return {} + raise KeyError(f"Reference with doi={ref} not found in the database neither online") From d36d29f5a32bc4c6a98007f664883173a7faae63 Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 6 Sep 2020 16:23:00 +0100 Subject: [PATCH 11/13] Remove unused package import --- r2t2/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/r2t2/core.py b/r2t2/core.py index 52bd17d..6ef7413 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -3,7 +3,6 @@ from typing import NamedTuple, List, Optional, Callable, Dict, Union from functools import reduce, partial from pathlib import Path -from warnings import warn import bibtexparser as bp From a4a3cd74fbdc23fc82c8e9a8b288f0cf15e23c81 Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 6 Sep 2020 16:26:46 +0100 Subject: [PATCH 12/13] Add requests as dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 58b7cdc..88178bc 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ author="Research Computing Service, Imperial College London", author_email="rcs-support@imperial.ac.uk", url="https://github.com/ImperialCollegeLondon/R2T2", - install_requires=["wrapt", "bibtexparser"], + install_requires=["wrapt", "bibtexparser", "requests"], tests_require=tests_require, extras_require={"dev": tests_require + precommit}, packages=find_packages("."), From 6611f2323490527f0eca7758e33c4c9d5398de6d Mon Sep 17 00:00:00 2001 From: Diego Date: Mon, 7 Sep 2020 05:38:00 +0100 Subject: [PATCH 13/13] Use keywords in register process --- r2t2/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r2t2/core.py b/r2t2/core.py index 6ef7413..1e69e30 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -192,7 +192,7 @@ def wrapper(wrapped, instance, args, kwargs): return wrapper -@Biblio.register_process("plain") +@Biblio.register_process(name="plain") def process_plain(ref: str, *args, **kwargs) -> Dict: """ Process a plain string reference. Dummy function. @@ -206,7 +206,7 @@ def process_plain(ref: str, *args, **kwargs) -> Dict: return {"ID": hash(ref), "ENTRYTYPE": "misc", "title": ref} -@Biblio.register_process("bibtex") +@Biblio.register_process(name="bibtex") def process_bibtex(ref: str, package: str, *args, **kwargs) -> Dict: """ Process a bibtex key reference. @@ -224,7 +224,7 @@ def process_bibtex(ref: str, package: str, *args, **kwargs) -> Dict: return BIBLIOGRAPHY.get_source(package).entries_dict[ref] -@Biblio.register_process("doi") +@Biblio.register_process(name="doi") def process_doi(ref: str, package: str, *args, **kwargs) -> Dict: """ Process a doi key reference.