diff --git a/docs/conf.py b/docs/conf.py index 2072d51..9c7335e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,12 +17,12 @@ # -- Project information ----------------------------------------------------- -project = 'Research References Tracking Tool (R2T2)' -copyright = '2020, Research Computing Service, Imperial College London' -author = 'Research Computing Service, Imperial College London' +project = "Research References Tracking Tool (R2T2)" +copyright = "2020, Research Computing Service, Imperial College London" +author = "Research Computing Service, Imperial College London" # The full version, including alpha/beta/rc tags -release = '0.3.1' +release = "0.3.1" # -- General configuration --------------------------------------------------- @@ -30,16 +30,15 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = [ -] +extensions = [] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The master toctree document. master_doc = "index" @@ -49,9 +48,9 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +html_theme = "alabaster" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] diff --git a/docs/examples/minimal-class.py b/docs/examples/minimal-class.py index 420c733..2761c96 100644 --- a/docs/examples/minimal-class.py +++ b/docs/examples/minimal-class.py @@ -1,9 +1,11 @@ from r2t2 import add_reference -@add_reference(short_purpose="Original implementation of R2T2", - reference="Diego Alonso-Álvarez, et al." - "(2018, February 27). Solcore (Version 5.1.0). Zenodo." - "http://doi.org/10.5281/zenodo.1185316") -class MyGreatClass(): +@add_reference( + short_purpose="Original implementation of R2T2", + reference="Diego Alonso-Álvarez, et al." + "(2018, February 27). Solcore (Version 5.1.0). Zenodo." + "http://doi.org/10.5281/zenodo.1185316", +) +class MyGreatClass: pass diff --git a/docs/examples/minimal-method.py b/docs/examples/minimal-method.py index 7198662..74c4704 100644 --- a/docs/examples/minimal-method.py +++ b/docs/examples/minimal-method.py @@ -1,11 +1,12 @@ from r2t2 import add_reference -class MyGreatClass(): - - @add_reference(short_purpose="Original implementation of R2T2", - reference="Diego Alonso-Álvarez, et al." - "(2018, February 27). Solcore (Version 5.1.0). Zenodo." - "http://doi.org/10.5281/zenodo.1185316") +class MyGreatClass: + @add_reference( + short_purpose="Original implementation of R2T2", + reference="Diego Alonso-Álvarez, et al." + "(2018, February 27). Solcore (Version 5.1.0). Zenodo." + "http://doi.org/10.5281/zenodo.1185316", + ) def my_great_function(self): pass diff --git a/docs/examples/minimal.py b/docs/examples/minimal.py index f60fff8..cd08301 100644 --- a/docs/examples/minimal.py +++ b/docs/examples/minimal.py @@ -1,10 +1,12 @@ from r2t2 import add_reference -@add_reference(short_purpose="Original implementation of R2T2", - reference="Diego Alonso-Álvarez, et al." - "(2018, February 27). Solcore (Version 5.1.0). Zenodo." - "http://doi.org/10.5281/zenodo.1185316") +@add_reference( + short_purpose="Original implementation of R2T2", + reference="Diego Alonso-Álvarez, et al." + "(2018, February 27). Solcore (Version 5.1.0). Zenodo." + "http://doi.org/10.5281/zenodo.1185316", +) def my_great_function(): pass diff --git a/r2t2/__main__.py b/r2t2/__main__.py index c92f206..090fc06 100644 --- a/r2t2/__main__.py +++ b/r2t2/__main__.py @@ -10,7 +10,7 @@ from .writers import REGISTERED_WRITERS from .docstring_reference_parser import ( expand_file_list, - parse_and_add_docstring_references_from_files + parse_and_add_docstring_references_from_files, ) @@ -38,7 +38,7 @@ def add_common_arguments(parser: argparse.ArgumentParser): default="terminal", type=str, choices=sorted(REGISTERED_WRITERS.keys()), - help="Format of the output. Default: Terminal." + help="Format of the output. Default: Terminal.", ) parser.add_argument( "--encoding", @@ -53,11 +53,7 @@ def add_common_arguments(parser: argparse.ArgumentParser): help="File to save the references into. Ignored if format is 'Terminal'." " Default: [target folder]/references.", ) - parser.add_argument( - "--debug", - action="store_true", - help="Enable debug logging" - ) + parser.add_argument("--debug", action="store_true", help="Enable debug logging") class RunSubCommand(SubCommand): @@ -102,27 +98,24 @@ def add_arguments(self, parser: argparse.ArgumentParser): "target", default=".", type=str, - help="Target file or folder to analyse." - " Default: Current directory.", + help="Target file or folder to analyse." " Default: Current directory.", ) def run(self, args: argparse.Namespace): if args.notebook: - if not args.target.endswith('.ipynb'): - raise Exception("If --notebook flag is passed, target must be a" - " Jupyter notebook!") + if not args.target.endswith(".ipynb"): + raise Exception( + "If --notebook flag is passed, target must be a" + " Jupyter notebook!" + ) locate_references(args.target, encoding=args.encoding) if args.docstring or args.notebook: parse_and_add_docstring_references_from_files( - expand_file_list(args.target), - encoding=args.encoding + expand_file_list(args.target), encoding=args.encoding ) -SUB_COMMANDS: List[SubCommand] = [ - RunSubCommand(), - StaticSubCommand() -] +SUB_COMMANDS: List[SubCommand] = [RunSubCommand(), StaticSubCommand()] SUB_COMMAND_BY_NAME: Dict[str, SubCommand] = { sub_command.name: sub_command for sub_command in SUB_COMMANDS @@ -169,6 +162,6 @@ def main(argv: List[str] = None): run(args) -if __name__ == '__main__': - logging.basicConfig(level='INFO') +if __name__ == "__main__": + logging.basicConfig(level="INFO") main() diff --git a/r2t2/core.py b/r2t2/core.py index 4ee162d..1e69e30 100755 --- a/r2t2/core.py +++ b/r2t2/core.py @@ -1,24 +1,50 @@ import inspect import wrapt from typing import NamedTuple, List, Optional, Callable, Dict, Union -from functools import reduce +from functools import reduce, partial from pathlib import Path +import bibtexparser as bp + +from .doi2bib import doi2bib + class FunctionReference(NamedTuple): name: str line: int source: str + package: str short_purpose: List[str] references: List[str] class Biblio(dict): track_references: bool = False + _processes: Dict[str, Callable] = {} + + @classmethod + def register_process(cls, f: Optional[Callable] = None, name: Optional[str] = None): + """Register a function for processing references in the registry. + + Args: + f: Function to process references. + name (str): Name of the type of reference to process, eg. plain, doi... + + Returns: + The same input function + """ + if f is None: + return partial(cls.register_process, name=name) + + name = name if name else f.__name__ + + cls._processes[name] = f + return f def __init__(self): super().__init__() self._sources: Dict[str, Path] = {} + self._sources_loaded: Dict[str, bp.bibdatabase.BibDatabase] = {} def __str__(self): def add_record(out, record): @@ -37,6 +63,7 @@ def add_record(out, record): def clear(self) -> None: super().clear() self._sources.clear() + self._sources_loaded.clear() @property def references(self): @@ -78,6 +105,34 @@ def add_source(self, source: Union[str, Path]) -> None: ) self._sources[package] = src + def load_source(self, package: str) -> None: + """Open the source for the given package.""" + with self._sources[package].open() as f: + self._sources_loaded[package] = bp.load(f) + + def get_source(self, package: str) -> bp.bibdatabase.BibDatabase: + """Provide the requested sources database.""" + if package not in self._sources_loaded: + self.load_source(package) + return self._sources_loaded[package] + + def add_entry_to_source(self, entry: dict, package: str) -> None: + """Add entry to source and save it source for the given package.""" + self._sources_loaded[package].entries.append(entry) + with self._sources[package].open() as f: + bp.dump(self._sources_loaded[package], f) + + def process_ref(self, ref: FunctionReference) -> List[Dict]: + """Process the reference keys and retrieves the full information.""" + self.get_source(ref.package) + + processed = [] + for refstr in ref.references: + rtype, rstr = refstr.strip("[").split("]", 1) + processed.append(self._processes[rtype](rstr, ref.package)) + + return processed + BIBLIOGRAPHY: Biblio = Biblio() @@ -116,13 +171,17 @@ def wrapper(wrapped, instance, args, kwargs): source = inspect.getsourcefile(wrapped) line = inspect.getsourcelines(wrapped)[1] identifier = f"{source}:{line}" + try: + package = inspect.getmodule(inspect.stack()[1][0]).__name__.split(".")[0] + except AttributeError: + package = "" if identifier in BIBLIOGRAPHY and ref in BIBLIOGRAPHY[identifier].references: return wrapped(*args, **kwargs) if identifier not in BIBLIOGRAPHY: BIBLIOGRAPHY[identifier] = FunctionReference( - wrapped.__name__, line, source, [], [] + wrapped.__name__, line, source, package, [], [] ) BIBLIOGRAPHY[identifier].short_purpose.append(short_purpose) @@ -131,3 +190,69 @@ def wrapper(wrapped, instance, args, kwargs): return wrapped(*args, **kwargs) return wrapper + + +@Biblio.register_process(name="plain") +def process_plain(ref: str, *args, **kwargs) -> Dict: + """ Process a plain string reference. Dummy function. + + Args: + ref (str): The input reference string + + Returns: + A dictionary with the reference string as "title", a unique ID equal to the hash + of the reference string and an "ENTRYTYPE" equal to "misc". + """ + return {"ID": hash(ref), "ENTRYTYPE": "misc", "title": ref} + + +@Biblio.register_process(name="bibtex") +def process_bibtex(ref: str, package: str, *args, **kwargs) -> Dict: + """ Process a bibtex key reference. + + Args: + ref (str): The bibtex key. + package (str): The package from where to get the reference from. + + Raises: + KeyError: If the reference source for that package does not contain the + requested key. + + Returns: + A dictionary with the reference full information + """ + return BIBLIOGRAPHY.get_source(package).entries_dict[ref] + + +@Biblio.register_process(name="doi") +def process_doi(ref: str, package: str, *args, **kwargs) -> Dict: + """ Process a doi key reference. + + First, it will look for the reference in the database for the given package. If it + is not found there, it will retrieved it from the internet. If successful, the + reference will be added to the database, so future requests to access this reference + will be local. + + Args: + ref (str): The doi of the reference. + package (str): The package from where to get the reference from in the first + instance and where to save the reference after getting it online. + + Raises: + KeyError: If the reference is not found neither in the database nor online. + + Returns: + A dictionary with the reference full information + """ + db = BIBLIOGRAPHY.get_source(package) + for entry in db.entries: + out = entry if entry.get("doi") == ref else None + if out: + return out + + out = doi2bib(ref) + if out: + BIBLIOGRAPHY.add_entry_to_source(bp.loads(out), package) + return db.entries[-1] + + raise KeyError(f"Reference with doi={ref} not found in the database neither online") diff --git a/r2t2/docstring_parser.py b/r2t2/docstring_parser.py index 323e3e5..14fcabb 100644 --- a/r2t2/docstring_parser.py +++ b/r2t2/docstring_parser.py @@ -13,7 +13,7 @@ """ -DEFAULT_ENCODING = 'utf-8' +DEFAULT_ENCODING = "utf-8" class CodeDocumentComment(NamedTuple): @@ -21,28 +21,31 @@ class CodeDocumentComment(NamedTuple): filename: Optional[str] = None lineno: Optional[int] = None name: Optional[str] = None + package: Optional[str] = None def iter_extract_docstring_from_text( - text: str, filename: str = None, + text: str, + filename: str = None, notebook: bool = False, ) -> Iterable[CodeDocumentComment]: - tree = ast.parse(text, filename=filename or '') + tree = ast.parse(text, filename=filename or "") for node in ast.walk(tree): - LOGGER.debug('node: %r', node) + LOGGER.debug("node: %r", node) try: node_docstring = ast.get_docstring(node) - LOGGER.debug('node_docstring: %r', node_docstring) + LOGGER.debug("node_docstring: %r", node_docstring) if node_docstring: if notebook: - lineno = 'n/a' + lineno = "n/a" else: - lineno = getattr(node, 'lineno', 1) + lineno = getattr(node, "lineno", 1) yield CodeDocumentComment( filename=filename, lineno=lineno, - name=getattr(node, 'name', None), - text=node_docstring + name=getattr(node, "name", None), + text=node_docstring, + package="", ) except TypeError: # node type may not be able to have docstrings @@ -50,14 +53,13 @@ def iter_extract_docstring_from_text( def iter_extract_docstring_from_lines( - lines: Iterable[str] + lines: Iterable[str], ) -> Iterable[CodeDocumentComment]: - return iter_extract_docstring_from_text('\n'.join(lines)) + return iter_extract_docstring_from_text("\n".join(lines)) def iter_extract_docstring_from_file( - path: Union[str, Path], - encoding: str = DEFAULT_ENCODING + path: Union[str, Path], encoding: str = DEFAULT_ENCODING ) -> Iterable[CodeDocumentComment]: path = Path(path) txt = path.read_text(encoding=encoding) @@ -72,13 +74,11 @@ def iter_extract_docstring_from_file( txt.append(FAKE_FUNC.format(i, " ".join(c["source"]))) txt = "\n".join(txt) notebook = True - return iter_extract_docstring_from_text(txt, filename=str(path), - notebook=notebook) + return iter_extract_docstring_from_text(txt, filename=str(path), notebook=notebook) def iter_extract_docstring_from_files( - paths: Iterable[Union[str, Path]], - **kwargs + paths: Iterable[Union[str, Path]], **kwargs ) -> Iterable[CodeDocumentComment]: for path in paths: yield from iter_extract_docstring_from_file(path, **kwargs) diff --git a/r2t2/docstring_reference_parser.py b/r2t2/docstring_reference_parser.py index 03dcc0b..321e39b 100644 --- a/r2t2/docstring_reference_parser.py +++ b/r2t2/docstring_reference_parser.py @@ -5,17 +5,14 @@ from r2t2.core import Biblio, BIBLIOGRAPHY, FunctionReference from r2t2.plain_text_parser import iter_parse_plain_text_references -from r2t2.docstring_parser import ( - CodeDocumentComment, - iter_extract_docstring_from_files -) +from r2t2.docstring_parser import CodeDocumentComment, iter_extract_docstring_from_files LOGGER = logging.getLogger(__name__) -DOCSTRING_SHORT_PURPOSE = 'automatically parsed from docstring' -NOTEBOOK_SHORT_PURPOSE = 'automatically parsed from markdown cell' +DOCSTRING_SHORT_PURPOSE = "automatically parsed from docstring" +NOTEBOOK_SHORT_PURPOSE = "automatically parsed from markdown cell" def expand_file_list(path: Union[Path, str]) -> List[Path]: @@ -34,25 +31,25 @@ def get_function_reference_identifier(function_reference: FunctionReference) -> def get_function_reference_from_docstring( - docstring: CodeDocumentComment + docstring: CodeDocumentComment, ) -> FunctionReference: references = list(iter_parse_plain_text_references(docstring.text)) - if docstring.lineno != 'n/a': + if docstring.lineno != "n/a": purpose = DOCSTRING_SHORT_PURPOSE else: purpose = NOTEBOOK_SHORT_PURPOSE return FunctionReference( - source=docstring.filename or '', + source=docstring.filename or "", line=docstring.lineno or 0, - name=docstring.name or '', + package=docstring.package or "", + name=docstring.name or "", references=references, - short_purpose=[purpose] * len(references) + short_purpose=[purpose] * len(references), ) def iter_parse_docstring_function_references_from_files( - filenames: Iterable[Union[str, Path]], - **kwargs + filenames: Iterable[Union[str, Path]], **kwargs ) -> Iterable[Tuple[str, FunctionReference]]: for docstring in iter_extract_docstring_from_files(filenames, **kwargs): function_reference = get_function_reference_from_docstring(docstring) @@ -62,9 +59,7 @@ def iter_parse_docstring_function_references_from_files( def parse_and_add_docstring_references_from_files( - filenames: Iterable[Union[str, Path]], - biblio: Biblio = None, - **kwargs + filenames: Iterable[Union[str, Path]], biblio: Biblio = None, **kwargs ): if biblio is None: biblio = BIBLIOGRAPHY diff --git a/r2t2/doi2bib.py b/r2t2/doi2bib.py new file mode 100644 index 0000000..c48c1b4 --- /dev/null +++ b/r2t2/doi2bib.py @@ -0,0 +1,19 @@ +"""The following code has been adapted from: + +https://gist.github.com/jrsmith3/5513926 +""" + +import requests +from warnings import warn +from typing import Optional + + +def doi2bib(doi) -> Optional[str]: + """Return a bibTeX string of metadata for a given DOI.""" + url = doi if "doi.org" in doi else f"https://doi.org/{doi}" + headers = {"accept": "application/x-bibtex"} + r = requests.get(url, headers=headers) + if "DOI Not Found" in r.text: + warn(f"Reference with doi={doi} not found!", UserWarning) + return None + return r.text diff --git a/r2t2/plain_text_parser.py b/r2t2/plain_text_parser.py index 348bf6d..9434335 100644 --- a/r2t2/plain_text_parser.py +++ b/r2t2/plain_text_parser.py @@ -3,26 +3,26 @@ from typing import Iterable, List -DOI_URL_HTTPS_PREFIX = 'https://doi.org/' +DOI_URL_HTTPS_PREFIX = "https://doi.org/" def iter_doi(text: str) -> Iterable[str]: - for m in re.findall(r'\b10\.\d{4,}/\S+', text): + for m in re.findall(r"\b10\.\d{4,}/\S+", text): yield DOI_URL_HTTPS_PREFIX + str(m) def iter_sphinx_reference_names(text: str) -> Iterable[str]: - for m in re.finditer(r':cite:`([^`]+)`', text): + for m in re.finditer(r":cite:`([^`]+)`", text): yield m.group(1) def iter_latex_reference_names(text: str) -> Iterable[str]: - for m in re.finditer(r'\\cite(?:\[[^\]]*\])?{([^}]+)}', text): + for m in re.finditer(r"\\cite(?:\[[^\]]*\])?{([^}]+)}", text): yield m.group(1) def iter_doxygen_reference_names(text: str) -> Iterable[str]: - for m in re.finditer(r'\\cite\s(\S+)', text): + for m in re.finditer(r"\\cite\s(\S+)", text): yield m.group(1) @@ -33,7 +33,7 @@ def iter_parse_plain_text_raw_bib_references(text: str) -> Iterable[str]: def iter_parse_plain_text_bib_references(text: str) -> Iterable[str]: for raw_reference in iter_parse_plain_text_raw_bib_references(text): - for ref_name in raw_reference.split(','): + for ref_name in raw_reference.split(","): yield ref_name.strip() diff --git a/r2t2/runtime_tracker.py b/r2t2/runtime_tracker.py index 34f6bf2..f8c80c7 100644 --- a/r2t2/runtime_tracker.py +++ b/r2t2/runtime_tracker.py @@ -17,8 +17,7 @@ def runtime_tracker(script: str, args: List[str], encoding: str): try: LOGGER.debug( - "loading script: %s (args: %s, encoding: %s)", - script, args, encoding + "loading script: %s (args: %s, encoding: %s)", script, args, encoding ) with open(script, encoding=encoding) as fp: code = compile(fp.read(), script, "exec") diff --git a/r2t2/static_parser.py b/r2t2/static_parser.py index 4071ea0..1b71e3b 100644 --- a/r2t2/static_parser.py +++ b/r2t2/static_parser.py @@ -15,7 +15,7 @@ class FileReferenceParseError(FileParseError): pass -DEFAULT_ENCODING = 'utf-8' +DEFAULT_ENCODING = "utf-8" def locate_references(path: Union[Path, str], encoding: str = DEFAULT_ENCODING): @@ -65,7 +65,7 @@ def locate_references_in_file(filename: Union[Path, str], encoding: str): raise except Exception as exc: raise FileParseError( - 'failed to process %s due to %r' % (filename, exc) + "failed to process %s due to %r" % (filename, exc) ) from exc @@ -81,7 +81,7 @@ def parse_references( try: name = re.findall(r"[\w']+", current)[1] - BIBLIOGRAPHY[identifier] = FunctionReference(name, line_num, source, [], []) + BIBLIOGRAPHY[identifier] = FunctionReference(name, line_num, source, "", [], []) def add_ref(i, j): one_ref = " ".join(ref_raw[i:j]).replace("@", "_") @@ -93,5 +93,5 @@ def add_ref(i, j): reduce(add_ref, ref_lines) except Exception as exc: raise FileReferenceParseError( - 'failed to process %s due to %r' % (identifier, exc) + "failed to process %s due to %r" % (identifier, exc) ) from exc diff --git a/setup.py b/setup.py index 3df903a..88178bc 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ author="Research Computing Service, Imperial College London", author_email="rcs-support@imperial.ac.uk", url="https://github.com/ImperialCollegeLondon/R2T2", - install_requires=["wrapt"], + install_requires=["wrapt", "bibtexparser", "requests"], tests_require=tests_require, extras_require={"dev": tests_require + precommit}, packages=find_packages("."), diff --git a/tests/conftest.py b/tests/conftest.py index fbb1f90..7f0c3e7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -45,10 +45,10 @@ def a_great_function(): return a_great_function -@pytest.fixture(scope='session', autouse=True) +@pytest.fixture(scope="session", autouse=True) def setup_logging(): - for name in {'r2t2', 'tests'}: - logging.getLogger(name).setLevel('DEBUG') + for name in {"r2t2", "tests"}: + logging.getLogger(name).setLevel("DEBUG") @pytest.fixture() diff --git a/tests/test__main__.py b/tests/test__main__.py index 0e28594..ef76380 100644 --- a/tests/test__main__.py +++ b/tests/test__main__.py @@ -1,69 +1,36 @@ import pytest -from r2t2.__main__ import ( - parse_args, - main -) +from r2t2.__main__ import parse_args, main -VALID_FORMAT = 'markdown' -INVALID_FORMAT = 'other' +VALID_FORMAT = "markdown" +INVALID_FORMAT = "other" class TestParseArgs: def test_should_not_fail_on_valid_format(self): - args = parse_args([ - 'run', - '--format=%s' % VALID_FORMAT, - 'docs/examples' - ]) + args = parse_args(["run", "--format=%s" % VALID_FORMAT, "docs/examples"]) assert args.format == VALID_FORMAT def test_should_fail_on_invalid_format(self): with pytest.raises(SystemExit): - parse_args([ - 'run', - '--format=%s' % INVALID_FORMAT, - 'docs/examples' - ]) + parse_args(["run", "--format=%s" % INVALID_FORMAT, "docs/examples"]) -@pytest.mark.usefixtures('bibliography') +@pytest.mark.usefixtures("bibliography") class TestMain: - def test_should_not_fail_on_static_analysis_of_examples_including_docstrings( - self - ): - main([ - 'static', - '--docstring', - 'docs/examples' - ]) + def test_should_not_fail_on_static_analysis_of_examples_including_docstrings(self): + main(["static", "--docstring", "docs/examples"]) - def test_should_not_fail_on_static_analysis_of_examples_ignoring_docstrings( - self - ): - main([ - 'static', - 'docs/examples' - ]) + def test_should_not_fail_on_static_analysis_of_examples_ignoring_docstrings(self): + main(["static", "docs/examples"]) def test_should_not_fail_on_runtime_analysis_of_examples(self): - main([ - 'run', - 'docs/examples/minimal.py' - ]) + main(["run", "docs/examples/minimal.py"]) def test_should_not_fail_on_notebook(self): - main([ - 'static', - '--notebook', - 'tests/fixtures/notebook_doi.ipynb' - ]) + main(["static", "--notebook", "tests/fixtures/notebook_doi.ipynb"]) def test_should_fail_on_non_notebook(self): with pytest.raises(Exception): - main([ - 'static', - '--notebook', - 'docs/examples/docstring_doi.py' - ]) + main(["static", "--notebook", "docs/examples/docstring_doi.py"]) diff --git a/tests/test_core.py b/tests/test_core.py index 5733a3b..5e98ab4 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -86,7 +86,6 @@ def test_add_reference_from_doi(self, bib_with_tracking, decorated_with_doi): class TestAddSource: - def test_add_source_exception_if_not_bibtex(self, bibliography, tmp_path): source = tmp_path / "my_source" with raises(ValueError): @@ -110,3 +109,29 @@ def test_add_source(self, bibliography, tmp_path): bibliography.add_source(source) assert "tests" in bibliography._sources assert bibliography._sources["tests"] == source + + +class TestLoadSource: + + def test_load_source(self, bibliography, tmp_path): + source = tmp_path / "my_source.bib" + ref = """@article{Sulzer_2019, + doi = {10.1149/2.0441908jes}, + url = {https://doi.org/10.1149%2F2.0441908jes}, + year = 2019, + publisher = {The Electrochemical Society}, + volume = {166}, + number = {12}, + pages = {A2372--A2382}, + author = {Valentin Sulzer et al.}, + title = {Faster Lead-Acid Battery Simulations from Porous-Electrode Theory}, + journal = {Journal of The Electrochemical Society}, +} +""" + with source.open("w") as f: + f.write(ref) + + bibliography.add_source(source) + bibliography.load_source("tests") + assert "tests" in bibliography._sources_loaded + assert "Sulzer_2019" in bibliography._sources_loaded["tests"].entries_dict diff --git a/tests/test_docstring_parser.py b/tests/test_docstring_parser.py index d3c0baf..9071e34 100644 --- a/tests/test_docstring_parser.py +++ b/tests/test_docstring_parser.py @@ -4,12 +4,12 @@ CodeDocumentComment, iter_extract_docstring_from_lines, iter_extract_docstring_from_file, - iter_extract_docstring_from_files + iter_extract_docstring_from_files, ) -DOC_STRING_LINE_1 = 'the docstring line 1' -DOC_STRING_LINE_2 = 'the docstring line 2' +DOC_STRING_LINE_1 = "the docstring line 1" +DOC_STRING_LINE_2 = "the docstring line 2" class TestIterExtractDocstringFromLines: @@ -17,70 +17,75 @@ def test_should_extract_no_docstrings_from_empty_file(self): assert list(iter_extract_docstring_from_lines([])) == [] def test_should_extract_no_docstrings_from_file_without_docstrings(self): - assert list(iter_extract_docstring_from_lines([ - 'def some_function():', - ' pass' - ])) == [] + assert ( + list( + iter_extract_docstring_from_lines(["def some_function():", " pass"]) + ) + == [] + ) def test_should_extract_module_level_docstring_using_double_quotes(self): - assert list(iter_extract_docstring_from_lines([ - '"""', - DOC_STRING_LINE_1, - DOC_STRING_LINE_2, - '"""' - ])) == [CodeDocumentComment( - name=None, - lineno=1, - text='\n'.join([ - DOC_STRING_LINE_1, - DOC_STRING_LINE_2 - ]) - )] + assert list( + iter_extract_docstring_from_lines( + ['"""', DOC_STRING_LINE_1, DOC_STRING_LINE_2, '"""'] + ) + ) == [ + CodeDocumentComment( + name=None, + lineno=1, + text="\n".join([DOC_STRING_LINE_1, DOC_STRING_LINE_2]), + package="", + ) + ] def test_should_extract_function_level_docstring_using_double_quotes(self): - assert list(iter_extract_docstring_from_lines([ - 'def some_function():', - ' """', - ' ' + DOC_STRING_LINE_1, - ' ' + DOC_STRING_LINE_2, - ' """', - ' pass' - ])) == [CodeDocumentComment( - name='some_function', - lineno=1, - text='\n'.join([ - DOC_STRING_LINE_1, - DOC_STRING_LINE_2 - ]) - )] + assert list( + iter_extract_docstring_from_lines( + [ + "def some_function():", + ' """', + " " + DOC_STRING_LINE_1, + " " + DOC_STRING_LINE_2, + ' """', + " pass", + ] + ) + ) == [ + CodeDocumentComment( + name="some_function", + lineno=1, + text="\n".join([DOC_STRING_LINE_1, DOC_STRING_LINE_2]), + package="", + ) + ] class TestIterExtractDocstringFromFile: def test_should_extract_module_level_docstring_using_double_quotes( self, temp_dir: Path ): - file_path = temp_dir / 'test.py' - file_path.write_text('\n'.join([ - '"""', - DOC_STRING_LINE_1, - DOC_STRING_LINE_2, - '"""' - ])) - expected_docstrings = [CodeDocumentComment( - filename=str(file_path), - name=None, - lineno=1, - text='\n'.join([ - DOC_STRING_LINE_1, - DOC_STRING_LINE_2 - ]) - )] - assert list(iter_extract_docstring_from_file( - str(file_path) - )) == expected_docstrings - assert list(iter_extract_docstring_from_file( - Path(file_path) - )) == expected_docstrings - assert list(iter_extract_docstring_from_files( - [str(file_path)] - )) == expected_docstrings + file_path = temp_dir / "test.py" + file_path.write_text( + "\n".join(['"""', DOC_STRING_LINE_1, DOC_STRING_LINE_2, '"""']) + ) + expected_docstrings = [ + CodeDocumentComment( + filename=str(file_path), + name=None, + lineno=1, + text="\n".join([DOC_STRING_LINE_1, DOC_STRING_LINE_2]), + package="", + ) + ] + assert ( + list(iter_extract_docstring_from_file(str(file_path))) + == expected_docstrings + ) + assert ( + list(iter_extract_docstring_from_file(Path(file_path))) + == expected_docstrings + ) + assert ( + list(iter_extract_docstring_from_files([str(file_path)])) + == expected_docstrings + ) diff --git a/tests/test_docstring_reference_parser.py b/tests/test_docstring_reference_parser.py index 3ac5028..e9df904 100644 --- a/tests/test_docstring_reference_parser.py +++ b/tests/test_docstring_reference_parser.py @@ -4,125 +4,94 @@ from r2t2.docstring_reference_parser import ( DOCSTRING_SHORT_PURPOSE, NOTEBOOK_SHORT_PURPOSE, - parse_and_add_docstring_references_from_files + parse_and_add_docstring_references_from_files, ) -DOI_URL_HTTPS_PREFIX = 'https://doi.org/' +DOI_URL_HTTPS_PREFIX = "https://doi.org/" HERE = Path(__file__).parent FIXTURES = HERE / "fixtures" -DOI_1 = '10.1234/zenodo.1234567' -DOI_2 = '10.5281/zenodo.1185316' -DOI_3 = '10.3435/zenodo.1579823' +DOI_1 = "10.1234/zenodo.1234567" +DOI_2 = "10.5281/zenodo.1185316" +DOI_3 = "10.3435/zenodo.1579823" class TestParseAndAddDocstringReferencesFromFiles: - def test_should_parse_docstring_reference( - self, temp_dir: Path - ): - file_path = temp_dir / 'test.py' - file_path.write_text('\n'.join([ - 'def some_function():' - ' """', - ' ' + DOI_1, - ' """' - ])) - biblio = Biblio() - parse_and_add_docstring_references_from_files( - [file_path], - biblio=biblio + def test_should_parse_docstring_reference(self, temp_dir: Path): + file_path = temp_dir / "test.py" + file_path.write_text( + "\n".join(["def some_function():" ' """', " " + DOI_1, ' """']) ) - expected_identifier = '{source}:{name}:{line_num}'.format( + biblio = Biblio() + parse_and_add_docstring_references_from_files([file_path], biblio=biblio) + expected_identifier = "{source}:{name}:{line_num}".format( source=str(file_path), - name='some_function', + name="some_function", line_num=1, ) assert biblio.keys() == {expected_identifier} function_reference = biblio[expected_identifier] - assert function_reference.name == 'some_function' + assert function_reference.name == "some_function" assert function_reference.source == str(file_path) assert function_reference.line == 1 assert function_reference.references == [DOI_URL_HTTPS_PREFIX + DOI_1] assert function_reference.short_purpose == [DOCSTRING_SHORT_PURPOSE] - def test_should_not_override_existing_reference( - self, temp_dir: Path - ): - file_path = temp_dir / 'test.py' - file_path.write_text('\n'.join([ - 'def some_function():' - ' """', - ' ' + DOI_1, - ' """' - ])) - expected_identifier = '{source}:{name}:{line_num}'.format( + def test_should_not_override_existing_reference(self, temp_dir: Path): + file_path = temp_dir / "test.py" + file_path.write_text( + "\n".join(["def some_function():" ' """', " " + DOI_1, ' """']) + ) + expected_identifier = "{source}:{name}:{line_num}".format( source=str(file_path), - name='some_function', + name="some_function", line_num=1, ) biblio = Biblio() existing_function_reference = FunctionReference( - name='other', - source='other.py', + name="other", + source="other.py", line=-1, - short_purpose=['For testing'], - references=['test/123'] + package=",", + short_purpose=["For testing"], + references=["test/123"], ) biblio[expected_identifier] = existing_function_reference - parse_and_add_docstring_references_from_files( - [file_path], - biblio=biblio - ) + parse_and_add_docstring_references_from_files([file_path], biblio=biblio) assert biblio == {expected_identifier: existing_function_reference} - def test_should_not_add_function_reference_without_references( - self, temp_dir: Path - ): - file_path = temp_dir / 'test.py' - file_path.write_text('\n'.join([ - 'def some_function():' - ' """', - ' some docstring', - ' """' - ])) - biblio = Biblio() - parse_and_add_docstring_references_from_files( - [file_path], - biblio=biblio + def test_should_not_add_function_reference_without_references(self, temp_dir: Path): + file_path = temp_dir / "test.py" + file_path.write_text( + "\n".join( + ["def some_function():" ' """', " some docstring", ' """'] + ) ) + biblio = Biblio() + parse_and_add_docstring_references_from_files([file_path], biblio=biblio) assert not biblio - def test_should_parse_notebook_references( - self - ): + def test_should_parse_notebook_references(self): file_path = FIXTURES / "notebook_doi.ipynb" biblio = Biblio() - parse_and_add_docstring_references_from_files( - [file_path], - biblio=biblio - ) - identifiers = '{source}:{name}:{line_num}' - names = ['cell_0', 'cell_4'] + parse_and_add_docstring_references_from_files([file_path], biblio=biblio) + identifiers = "{source}:{name}:{line_num}" + names = ["cell_0", "cell_4"] dois = [[DOI_2, DOI_3], [DOI_1]] expected_identifiers = [ - identifiers.format( - source=str(file_path), - name=names[0], - line_num='n/a' - ), - identifiers.format( - source=str(file_path), - name=names[1], - line_num='n/a' - ), + identifiers.format(source=str(file_path), name=names[0], line_num="n/a"), + identifiers.format(source=str(file_path), name=names[1], line_num="n/a"), ] assert biblio.keys() == set(expected_identifiers) for i, identifier in enumerate(expected_identifiers): function_reference = biblio[identifier] assert function_reference.name == names[i] assert function_reference.source == str(file_path) - assert function_reference.line == 'n/a' - assert function_reference.references == [DOI_URL_HTTPS_PREFIX + d - for d in dois[i]] - assert function_reference.short_purpose == ([NOTEBOOK_SHORT_PURPOSE] - * len(dois[i])) + assert function_reference.line == "n/a" + assert function_reference.package == "" + assert function_reference.references == [ + DOI_URL_HTTPS_PREFIX + d for d in dois[i] + ] + assert function_reference.short_purpose == ( + [NOTEBOOK_SHORT_PURPOSE] * len(dois[i]) + ) diff --git a/tests/test_plain_text_parser.py b/tests/test_plain_text_parser.py index 34c7354..5ab329f 100644 --- a/tests/test_plain_text_parser.py +++ b/tests/test_plain_text_parser.py @@ -1,70 +1,64 @@ -from r2t2.plain_text_parser import ( - parse_plain_text_references -) +from r2t2.plain_text_parser import parse_plain_text_references -DOI_URL_HTTP_PREFIX = 'http://doi.org/' -DOI_URL_HTTPS_PREFIX = 'https://doi.org/' +DOI_URL_HTTP_PREFIX = "http://doi.org/" +DOI_URL_HTTPS_PREFIX = "https://doi.org/" -DOI_1 = '10.1234/zenodo.1234567' +DOI_1 = "10.1234/zenodo.1234567" -REF_NAME_1 = 'Smith2001-1' -REF_NAME_2 = 'Johnson2002-2' +REF_NAME_1 = "Smith2001-1" +REF_NAME_2 = "Johnson2002-2" class TestParsePlainTextReferences: def test_should_return_empty_list_for_empty_string(self): - assert parse_plain_text_references('') == [] + assert parse_plain_text_references("") == [] def test_should_return_empty_list_for_text_without_reference(self): - assert parse_plain_text_references('description of some function') == [] + assert parse_plain_text_references("description of some function") == [] def test_should_parse_doi_without_additional_text(self): - assert parse_plain_text_references( - DOI_1 - ) == [DOI_URL_HTTPS_PREFIX + DOI_1] + assert parse_plain_text_references(DOI_1) == [DOI_URL_HTTPS_PREFIX + DOI_1] def test_should_parse_doi_with_surround_text(self): - assert parse_plain_text_references( - 'DOI: ' + DOI_1 + ' used for xyz' - ) == [DOI_URL_HTTPS_PREFIX + DOI_1] + assert parse_plain_text_references("DOI: " + DOI_1 + " used for xyz") == [ + DOI_URL_HTTPS_PREFIX + DOI_1 + ] def test_should_parse_doi_http_url(self): - assert parse_plain_text_references( - DOI_URL_HTTP_PREFIX + DOI_1 - ) == [DOI_URL_HTTPS_PREFIX + DOI_1] + assert parse_plain_text_references(DOI_URL_HTTP_PREFIX + DOI_1) == [ + DOI_URL_HTTPS_PREFIX + DOI_1 + ] def test_should_parse_doi_https_url(self): - assert parse_plain_text_references( - DOI_URL_HTTP_PREFIX + DOI_1 - ) == [DOI_URL_HTTPS_PREFIX + DOI_1] + assert parse_plain_text_references(DOI_URL_HTTP_PREFIX + DOI_1) == [ + DOI_URL_HTTPS_PREFIX + DOI_1 + ] def test_should_parse_single_sphinx_cite_reference(self): assert parse_plain_text_references( - r'See :cite:`' + REF_NAME_1 + r'` for an introduction' + r"See :cite:`" + REF_NAME_1 + r"` for an introduction" ) == [REF_NAME_1] def test_should_parse_multiple_sphinx_cite_references(self): assert parse_plain_text_references( - r'See :cite:`' + REF_NAME_1 + ', ' + REF_NAME_2 + r'` for an introduction' + r"See :cite:`" + REF_NAME_1 + ", " + REF_NAME_2 + r"` for an introduction" ) == [REF_NAME_1, REF_NAME_2] def test_should_parse_single_latex_cite_reference(self): - assert parse_plain_text_references( - r'\\cite{' + REF_NAME_1 + r'}' - ) == [REF_NAME_1] + assert parse_plain_text_references(r"\\cite{" + REF_NAME_1 + r"}") == [ + REF_NAME_1 + ] def test_should_parse_multiple_latex_cite_reference(self): assert parse_plain_text_references( - r'\\cite{' + REF_NAME_1 + ', ' + REF_NAME_2 + r'}' + r"\\cite{" + REF_NAME_1 + ", " + REF_NAME_2 + r"}" ) == [REF_NAME_1, REF_NAME_2] def test_should_parse_latex_cite_reference_with_page_number(self): - assert parse_plain_text_references( - r'\\cite[p.~215]{' + REF_NAME_1 + r'}' - ) == [REF_NAME_1] + assert parse_plain_text_references(r"\\cite[p.~215]{" + REF_NAME_1 + r"}") == [ + REF_NAME_1 + ] def test_should_parse_single_doxygen_cite_reference(self): - assert parse_plain_text_references( - r'\\cite ' + REF_NAME_1 - ) == [REF_NAME_1] + assert parse_plain_text_references(r"\\cite " + REF_NAME_1) == [REF_NAME_1]