Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Process references #73

Draft
wants to merge 16 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,28 @@

# -- Project information -----------------------------------------------------

project = 'Research References Tracking Tool (R2T2)'
copyright = '2020, Research Computing Service, Imperial College London'
author = 'Research Computing Service, Imperial College London'
project = "Research References Tracking Tool (R2T2)"
copyright = "2020, Research Computing Service, Imperial College London"
author = "Research Computing Service, Imperial College London"

# The full version, including alpha/beta/rc tags
release = '0.3.1'
release = "0.3.1"


# -- General configuration ---------------------------------------------------

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
]
extensions = []

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
templates_path = ["_templates"]

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# The master toctree document.
master_doc = "index"
Expand All @@ -49,9 +48,9 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
html_theme = "alabaster"

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ["_static"]
12 changes: 7 additions & 5 deletions docs/examples/minimal-class.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from r2t2 import add_reference


@add_reference(short_purpose="Original implementation of R2T2",
reference="Diego Alonso-Álvarez, et al."
"(2018, February 27). Solcore (Version 5.1.0). Zenodo."
"http://doi.org/10.5281/zenodo.1185316")
class MyGreatClass():
@add_reference(
short_purpose="Original implementation of R2T2",
reference="Diego Alonso-Álvarez, et al."
"(2018, February 27). Solcore (Version 5.1.0). Zenodo."
"http://doi.org/10.5281/zenodo.1185316",
)
class MyGreatClass:
pass
13 changes: 7 additions & 6 deletions docs/examples/minimal-method.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from r2t2 import add_reference


class MyGreatClass():

@add_reference(short_purpose="Original implementation of R2T2",
reference="Diego Alonso-Álvarez, et al."
"(2018, February 27). Solcore (Version 5.1.0). Zenodo."
"http://doi.org/10.5281/zenodo.1185316")
class MyGreatClass:
@add_reference(
short_purpose="Original implementation of R2T2",
reference="Diego Alonso-Álvarez, et al."
"(2018, February 27). Solcore (Version 5.1.0). Zenodo."
"http://doi.org/10.5281/zenodo.1185316",
)
def my_great_function(self):
pass
10 changes: 6 additions & 4 deletions docs/examples/minimal.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from r2t2 import add_reference


@add_reference(short_purpose="Original implementation of R2T2",
reference="Diego Alonso-Álvarez, et al."
"(2018, February 27). Solcore (Version 5.1.0). Zenodo."
"http://doi.org/10.5281/zenodo.1185316")
@add_reference(
short_purpose="Original implementation of R2T2",
reference="Diego Alonso-Álvarez, et al."
"(2018, February 27). Solcore (Version 5.1.0). Zenodo."
"http://doi.org/10.5281/zenodo.1185316",
)
def my_great_function():
pass

Expand Down
33 changes: 13 additions & 20 deletions r2t2/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .writers import REGISTERED_WRITERS
from .docstring_reference_parser import (
expand_file_list,
parse_and_add_docstring_references_from_files
parse_and_add_docstring_references_from_files,
)


Expand Down Expand Up @@ -38,7 +38,7 @@ def add_common_arguments(parser: argparse.ArgumentParser):
default="terminal",
type=str,
choices=sorted(REGISTERED_WRITERS.keys()),
help="Format of the output. Default: Terminal."
help="Format of the output. Default: Terminal.",
)
parser.add_argument(
"--encoding",
Expand All @@ -53,11 +53,7 @@ def add_common_arguments(parser: argparse.ArgumentParser):
help="File to save the references into. Ignored if format is 'Terminal'."
" Default: [target folder]/references.",
)
parser.add_argument(
"--debug",
action="store_true",
help="Enable debug logging"
)
parser.add_argument("--debug", action="store_true", help="Enable debug logging")


class RunSubCommand(SubCommand):
Expand Down Expand Up @@ -102,27 +98,24 @@ def add_arguments(self, parser: argparse.ArgumentParser):
"target",
default=".",
type=str,
help="Target file or folder to analyse."
" Default: Current directory.",
help="Target file or folder to analyse." " Default: Current directory.",
)

def run(self, args: argparse.Namespace):
if args.notebook:
if not args.target.endswith('.ipynb'):
raise Exception("If --notebook flag is passed, target must be a"
" Jupyter notebook!")
if not args.target.endswith(".ipynb"):
raise Exception(
"If --notebook flag is passed, target must be a"
" Jupyter notebook!"
)
locate_references(args.target, encoding=args.encoding)
if args.docstring or args.notebook:
parse_and_add_docstring_references_from_files(
expand_file_list(args.target),
encoding=args.encoding
expand_file_list(args.target), encoding=args.encoding
)


SUB_COMMANDS: List[SubCommand] = [
RunSubCommand(),
StaticSubCommand()
]
SUB_COMMANDS: List[SubCommand] = [RunSubCommand(), StaticSubCommand()]

SUB_COMMAND_BY_NAME: Dict[str, SubCommand] = {
sub_command.name: sub_command for sub_command in SUB_COMMANDS
Expand Down Expand Up @@ -169,6 +162,6 @@ def main(argv: List[str] = None):
run(args)


if __name__ == '__main__':
logging.basicConfig(level='INFO')
if __name__ == "__main__":
logging.basicConfig(level="INFO")
main()
129 changes: 127 additions & 2 deletions r2t2/core.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,50 @@
import inspect
import wrapt
from typing import NamedTuple, List, Optional, Callable, Dict, Union
from functools import reduce
from functools import reduce, partial
from pathlib import Path

import bibtexparser as bp

from .doi2bib import doi2bib


class FunctionReference(NamedTuple):
name: str
line: int
source: str
package: str
short_purpose: List[str]
references: List[str]


class Biblio(dict):
track_references: bool = False
_processes: Dict[str, Callable] = {}

@classmethod
def register_process(cls, f: Optional[Callable] = None, name: Optional[str] = None):
"""Register a function for processing references in the registry.

Args:
f: Function to process references.
name (str): Name of the type of reference to process, eg. plain, doi...

Returns:
The same input function
"""
if f is None:
return partial(cls.register_process, name=name)

name = name if name else f.__name__

cls._processes[name] = f
return f

def __init__(self):
super().__init__()
self._sources: Dict[str, Path] = {}
self._sources_loaded: Dict[str, bp.bibdatabase.BibDatabase] = {}

def __str__(self):
def add_record(out, record):
Expand All @@ -37,6 +63,7 @@ def add_record(out, record):
def clear(self) -> None:
super().clear()
self._sources.clear()
self._sources_loaded.clear()

@property
def references(self):
Expand Down Expand Up @@ -78,6 +105,34 @@ def add_source(self, source: Union[str, Path]) -> None:
)
self._sources[package] = src

def load_source(self, package: str) -> None:
"""Open the source for the given package."""
with self._sources[package].open() as f:
self._sources_loaded[package] = bp.load(f)

def get_source(self, package: str) -> bp.bibdatabase.BibDatabase:
"""Provide the requested sources database."""
if package not in self._sources_loaded:
self.load_source(package)
return self._sources_loaded[package]

def add_entry_to_source(self, entry: dict, package: str) -> None:
"""Add entry to source and save it source for the given package."""
self._sources_loaded[package].entries.append(entry)
with self._sources[package].open() as f:
bp.dump(self._sources_loaded[package], f)

def process_ref(self, ref: FunctionReference) -> List[Dict]:
"""Process the reference keys and retrieves the full information."""
self.get_source(ref.package)

processed = []
for refstr in ref.references:
rtype, rstr = refstr.strip("[").split("]", 1)
processed.append(self._processes[rtype](rstr, ref.package))

return processed


BIBLIOGRAPHY: Biblio = Biblio()

Expand Down Expand Up @@ -116,13 +171,17 @@ def wrapper(wrapped, instance, args, kwargs):
source = inspect.getsourcefile(wrapped)
line = inspect.getsourcelines(wrapped)[1]
identifier = f"{source}:{line}"
try:
package = inspect.getmodule(inspect.stack()[1][0]).__name__.split(".")[0]
except AttributeError:
package = ""

if identifier in BIBLIOGRAPHY and ref in BIBLIOGRAPHY[identifier].references:
return wrapped(*args, **kwargs)

if identifier not in BIBLIOGRAPHY:
BIBLIOGRAPHY[identifier] = FunctionReference(
wrapped.__name__, line, source, [], []
wrapped.__name__, line, source, package, [], []
)

BIBLIOGRAPHY[identifier].short_purpose.append(short_purpose)
Expand All @@ -131,3 +190,69 @@ def wrapper(wrapped, instance, args, kwargs):
return wrapped(*args, **kwargs)

return wrapper


@Biblio.register_process(name="plain")
def process_plain(ref: str, *args, **kwargs) -> Dict:
""" Process a plain string reference. Dummy function.

Args:
ref (str): The input reference string

Returns:
A dictionary with the reference string as "title", a unique ID equal to the hash
of the reference string and an "ENTRYTYPE" equal to "misc".
"""
return {"ID": hash(ref), "ENTRYTYPE": "misc", "title": ref}


@Biblio.register_process(name="bibtex")
def process_bibtex(ref: str, package: str, *args, **kwargs) -> Dict:
""" Process a bibtex key reference.

Args:
ref (str): The bibtex key.
package (str): The package from where to get the reference from.

Raises:
KeyError: If the reference source for that package does not contain the
requested key.

Returns:
A dictionary with the reference full information
"""
return BIBLIOGRAPHY.get_source(package).entries_dict[ref]


@Biblio.register_process(name="doi")
def process_doi(ref: str, package: str, *args, **kwargs) -> Dict:
""" Process a doi key reference.

First, it will look for the reference in the database for the given package. If it
is not found there, it will retrieved it from the internet. If successful, the
reference will be added to the database, so future requests to access this reference
will be local.

Args:
ref (str): The doi of the reference.
package (str): The package from where to get the reference from in the first
instance and where to save the reference after getting it online.

Raises:
KeyError: If the reference is not found neither in the database nor online.

Returns:
A dictionary with the reference full information
"""
db = BIBLIOGRAPHY.get_source(package)
for entry in db.entries:
out = entry if entry.get("doi") == ref else None
if out:
return out

out = doi2bib(ref)
if out:
BIBLIOGRAPHY.add_entry_to_source(bp.loads(out), package)
return db.entries[-1]

raise KeyError(f"Reference with doi={ref} not found in the database neither online")
Loading