diff --git a/.github/actions/install/action.yml b/.github/actions/install/action.yml index 349a06d1a..10f48bcb1 100644 --- a/.github/actions/install/action.yml +++ b/.github/actions/install/action.yml @@ -18,7 +18,7 @@ inputs: description: "The requirements file depends on the hardware, i.e., CPU, GPU, or macOS" required: true default: "cpu" - options: ["cpu", "gpu", "macos"] + options: ["cpu", "cu118", "cu121", "macos"] runs: using: "composite" diff --git a/.gitignore b/.gitignore index 8d75ef473..d8d702060 100644 --- a/.gitignore +++ b/.gitignore @@ -130,9 +130,10 @@ dmypy.json # Documentation docs/build/html -docs/**/*.rst +docs/source/api/*.rst docs/source/*.md + # Badges assets/badges diff --git a/docs/parse_about.py b/docs/parse_about.py deleted file mode 100644 index 94e09b7d4..000000000 --- a/docs/parse_about.py +++ /dev/null @@ -1,110 +0,0 @@ -"""Script to parse paper.md file to About section for website.""" - -import argparse -import re - - -def main(input_file: str, output_file: str) -> None: - """Parsing script.""" - # Check(s) - assert input_file != output_file - assert input_file.endswith(".md") - assert output_file.endswith(".md") - - # Read input file contents - with open(input_file, "r") as f: - content = f.read() - - # Get all section headers and constituent paragraphs - sections = list(re.finditer("^# (.*?)\n", content, re.M)) - corpus = {} - for s1, s2 in zip(sections[:-1], sections[1:]): - corpus[s1.group(1)] = [ - par for par in content[s1.end() : s2.start()].split("\n") if par - ] - - # Stitch together an About section - content = "\n\n".join( - [ - "# About", - corpus["Summary"][1], - "## Impact", - ] - + corpus["Impact on physics"] - + [ - "## Usage", - ] - + corpus["Usage"][:1] - + ["### FIGURE ###"] - + corpus["Usage"][2:] - + [ - "## Acknowledgements", - "### EU EMBLEM ###", - ] - + corpus["Acknowledgements"] - ) - - # Add figure - figure = corpus["Usage"][1] - m = re.search( - r"!\[(?P.*) *\\label\{.*\} *\]\((?P.*)\)", figure, re.M - ) - caption, path = m.group("caption"), m.group("path") - content = content.replace( - "### FIGURE ###", - f""" -:::{{figure-md}} flowchart -:class: figclass - -flowchart - -{caption} -:::""", - ) - - # Remove references - pattern = "\[[@\:\w]+\]" - references = re.findall(pattern, content) - for reference in references: - content = content.replace(f" {reference}", "") - - # Update figure reference - content = content.replace( - "\\autoref{fig:flowchart}", "[the Figure](flowchart)" - ) - - # Update acknowledgements to include EU emblem. - content = content.replace( - "Andreas Søgaard has received", "This project has received" - ) - content = content.replace( - "### EU EMBLEM ###", - f""" -:::{{figure-md}} eu-emblem - -eu-emblem - -  -:::""" - ) - - # Write parsed results to output file - with open(output_file, "w") as f: - f.write(content) - - -if __name__ == "__main__": - - # Parse command-line arguments - parser = argparse.ArgumentParser( - description=""" -Parse paper.md file to About section for website. -""" - ) - - parser.add_argument("-i", "--input-file", required=True) - parser.add_argument("-o", "--output-file", required=True) - - args = parser.parse_args() - - main(args.input_file, args.output_file) diff --git a/docs/parse_contributing.py b/docs/parse_contributing.py deleted file mode 100644 index 577f3de76..000000000 --- a/docs/parse_contributing.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Script to parse CONTRIBUTING.md file to contrib instructions for website.""" - -import argparse -import re - - -def main(input_file: str, output_file: str) -> None: - """Parsing script.""" - # Check(s) - assert input_file != output_file - assert input_file.endswith(".md") - assert output_file.endswith(".md") - - # Read input file contents - with open(input_file, "r") as f: - content = f.read() - - # Remove "Version control"-section everything after it. - pattern = "## Version control" - m = re.search(pattern, content) - content = content[: m.start()] - - # Trim for whitespaces and newlines - content = content.strip() - - # Rename title - content = "\n".join(["# Contribute"] + content.split("\n")[1:]) - - # Update relative links for absolute ones - content = content.replace( - "./", "../../" - ) - - # Write parsed results to output file - with open(output_file, "w") as f: - f.write(content) - - -if __name__ == "__main__": - - # Parse command-line arguments - parser = argparse.ArgumentParser( - description=""" -Parse CONTRIBUTING.md file to contrib instructions for website. -""" - ) - - parser.add_argument("-i", "--input-file", required=True) - parser.add_argument("-o", "--output-file", required=True) - - args = parser.parse_args() - - main(args.input_file, args.output_file) diff --git a/docs/parse_install.py b/docs/parse_install.py deleted file mode 100644 index 8dd428d62..000000000 --- a/docs/parse_install.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Script to parse README.md file to installation instructions for website.""" - -import argparse -import re - - -def main(input_file: str, output_file: str) -> None: - """Parsing script.""" - # Check(s) - assert input_file != output_file - assert input_file.endswith(".md") - assert output_file.endswith(".md") - - # Read input file contents - with open(input_file, "r") as f: - content = f.read() - - # Remove anything before "Install" section - pattern = r"^##.* Install.*$\n" - m = re.search(pattern, content, re.M) - content = "\n".join(["# Install", content[m.end() :]]) - - # Remove everying after "Install" - pattern = r"##.*$" - m = next(re.finditer(pattern, content, re.M)) - content = content[: m.start()] - - # Convert relevant
-blocks to headers - pattern = ( - r"
\n" r"(.*?)<\/b><\/summary>\n" r"
" - ) - for m in re.finditer(pattern, content, re.M): - content = content.replace(m.group(0), "## " + m.group(1)) - - content = content.replace("
\n
", "") - - # Trim for whitespaces and newlines - content = content.strip() - for _ in range(2): - content = content.replace("\n\n\n", "\n\n") - - # Write parsed results to output file - with open(output_file, "w") as f: - f.write(content) - - -if __name__ == "__main__": - - # Parse command-line arguments - parser = argparse.ArgumentParser( - description=""" -Parse README.md file to installation instructions for website. -""" - ) - - parser.add_argument("-i", "--input-file", required=True) - parser.add_argument("-o", "--output-file", required=True) - - args = parser.parse_args() - - main(args.input_file, args.output_file) diff --git a/docs/source/about/about.rst b/docs/source/about/about.rst new file mode 100644 index 000000000..8ac10fdbe --- /dev/null +++ b/docs/source/about/about.rst @@ -0,0 +1,37 @@ +.. include:: ../substitutions.rst + +About +============ + +|graphnet|\ GraphNeT is an open-source Python framework aimed at providing high quality, user friendly, end-to-end functionality to perform reconstruction tasks at neutrino telescopes using deep learning. |graphnet|\ GraphNeT makes it fast and easy to train complex models that can provide event reconstruction with state-of-the-art performance, for arbitrary detector configurations, with inference times that are orders of magnitude faster than traditional reconstruction techniques. +|graphnet|\ GraphNeT provides a common, detector agnostic framework for ML developers and physicists that wish to use the state-of-the-art tools in their research. By uniting both user groups, |graphnet|\ GraphNeT aims to increase the longevity and usability of individual code contributions from ML developers by building a general, reusable software package based on software engineering best practices, and lowers the technical threshold for physicists that wish to use the most performant tools for their scientific problems. + +Usage +----- + +|graphnet|\ GraphNeT comprises a number of modules providing the necessary tools to build workflows from ingesting raw training data in domain-specific formats to deploying trained models in domain-specific reconstruction chains, as illustrated in [the Figure](flowchart). + +.. _flowchart: +.. figure:: ../../../paper/flowchart.png + + High-level overview of a typical workflow using |graphnet|\ GraphNeT: :code:`graphnet.data` enables converting domain-specific data to industry-standard, intermediate file formats and reading this data; :code:`graphnet.models` allows for configuring and building complex models using simple, physics-oriented components; :code:`graphnet.training` manages model training and experiment logging; and finally, :code:`graphnet.deployment` allows for using trained models for inference in domain-specific reconstruction chains. + +:code:`graphnet.models` provides modular components subclassing :code:`torch.nn.Module`, meaning that users only need to import a few existing, purpose-built components and chain them together to form a complete model. ML developers can contribute to |graphnet|\ GraphNeT by extending this suite of model components — through new layer types, physics tasks, graph connectivities, etc. — and experiment with optimising these for different reconstruction tasks using experiment tracking. + +These models are trained using :code:`graphnet.training` on data prepared using :code:`graphnet.data`, to satisfy the high I/O loads required when training ML models on large batches of events, which domain-specific neutrino physics data formats typically do not allow. + +Trained models are deployed to a domain-specific reconstruction chain, yielding predictions, using the components in :code:`graphnet.deployment`. This can either be through model files or container images, making deployment as portable and dependency-free as possible. + +By splitting up the model development as in :numref:`flowchart`, |graphnet|\ GraphNeT allows physics users to interface only with high-level building blocks or pre-trained models that can be used directly in their reconstruction chains, while allowing ML developers to continuously improve and expand the framework’s capabilities. + + +Acknowledgements +---------------- + + +.. image:: ../../../assets/images/eu-emblem.jpg + :width: 150 + +This project has received funding from the European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie grant agreement No. 890778. + +The work of Rasmus Ørsøe was partly performed in the framework of the PUNCH4NFDI consortium supported by DFG fund "NFDI 39/1", Germany. \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 22c1da88d..da4f76e08 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -46,6 +46,9 @@ "myst_parser", ] +# Figure references +numfig = True + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/docs/source/contribute/contribute.rst b/docs/source/contribute/contribute.rst new file mode 100644 index 000000000..70ac9492f --- /dev/null +++ b/docs/source/contribute/contribute.rst @@ -0,0 +1,54 @@ +.. include:: ../substitutions.rst + +Contributing To GraphNeT\ |graphnet-header| +=========================================== +To make sure that the process of contributing is as smooth and effective as possible, we provide a few guidelines in this contributing guide that we encourage contributors to follow. + +GitHub issues +------------- + +Use `GitHub issues `_ for tracking and discussing requests and bugs. If there is anything you'd wish to contribute, the best place to start is to create a new issues and describe what you would like to work on. Alternatively you can assign open issues to yourself, to indicate that you would like to take ownership of a particular task. Using issues actively in this way ensures transparency and agreement on priorities. This helps avoid situations with a lot of development effort going into a feature that e.g. turns out to be outside of scope for the project; or a specific solution to a problem that could have been better solved differently. + +Pull requests +------------- + +Develop code in a fork of the `main repo `_. Make contributions in dedicated development/feature branches on your forked repositories, e.g. if you are implementing a specific :code:`GraphDefinition` class you could create a branch named :code:`add-euclidean-graph-definition` on your own fork. + +Create pull requests from your development branch into :code:`graphnet-team/graphnet:main` to contribute to the project. **To be accepted,** pull requests must: + +* pass all automated checks, + +* be reviewed by at least one other contributor. These reviews should check for: + + #. standard python coding conventions, e.g. `PEP8 `_ + + #. docstring (Google-style) and type hinting as necessary + + #. unit tests as necessary + + #. clean coding practices, see e.g. `here `_. + +Conventions +----------- + +This repository aims to support python 3 version that are actively supported (currently :code:`>=3.8`). Standard python coding conventions should be followed: + +* Adhere to `PEP8 `_ `black `_ +* Use `pylint `_ / `flake8 `_ and `black `_ to ensure as clean and well-formatted code as possible +* When relevant, adhere to `clean code practices `_ + +Code quality +------------ + +To ensure consistency in code style and adherence to select best practices, we **require** that all developers use :code:`black`, :code:`flake8`, :code:`mypy`, :code:`pydocstyle`, and :code:`docformatter` for automatically formatting and checking their code. This can conveniently be done using pre-commit hooks. To set this up, first make sure that you have installed the :code:`pre-commit` python package. It is included when installing |graphnet|\ GraphNeT with the :code:`develop` tag, i.e., :code:`pip install -e .[develop]`. Then, do + +.. code-block:: bash + + pre-commit install + + +Then, everytime you commit a change, your code and docstrings will automatically be formatted using :code:`black` and :code:`docformatter`, while :code:`flake8`, :code:`mypy`, and :code:`pydocstyle` will check for errors and adherence to PEP8, PEP257, and static typing. See an illustration of the concept below: + +.. image:: ../../../assets/images/precommit_pipeline.png + +Image source: https://ljvmiranda921.github.io/notebook/2018/06/21/precommits-using-black-and-flake8/ \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index da9a39a0e..388886f5e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -5,6 +5,9 @@ :maxdepth: 1 :hidden: - install - contribute - api/graphnet.rst \ No newline at end of file + + installation/install.rst + api/graphnet.rst + about/about.rst + contribute/contribute.rst + \ No newline at end of file diff --git a/docs/source/installation/install.rst b/docs/source/installation/install.rst new file mode 100644 index 000000000..ff08f2371 --- /dev/null +++ b/docs/source/installation/install.rst @@ -0,0 +1,46 @@ +.. include:: ../substitutions.rst + +Installation +============ + +|graphnet|\ GraphNeT is available for Python 3.8 to Python 3.11. + +.. note:: + We recommend installing |graphnet|\ GraphNeT in a separate environment, e.g. using a Python virtual environment or Anaconda (see details on installation `here `_). + +Quick Start +----------- + +.. raw:: html + :file: quick-start.html + + +When installation is completed, you should be able to run `the examples `_. + +Installation in CVMFS (IceCube) +------------------------------- + +You may want |graphnet|\ GraphNeT to be able to interface with IceTray, e.g., when converting I3 files to a deep learning friendly file format, or when deploying models as part of an IceTray chain. In these cases, you need to install |graphnet|\ GraphNeT in a Python runtime that has IceTray installed. + +To achieve this, we recommend installing |graphnet|\ GraphNeT into a CVMFS with IceTray installed, like so: + +.. code-block:: bash + + # Download GraphNeT + git clone https://github.com/graphnet-team/graphnet.git + cd graphnet + # Open your favorite CVMFS distribution + eval `/cvmfs/icecube.opensciencegrid.org/py3-v4.2.1/setup.sh` + /cvmfs/icecube.opensciencegrid.org/py3-v4.2.1/RHEL_7_x86_64/metaprojects/icetray/v1.5.1/env-shell.sh + # Update central utils + pip install --upgrade pip>=20 + pip install wheel setuptools==59.5.0 + # Install graphnet into the CVMFS as a user + pip install --user -r requirements/torch_cpu.txt -e .[torch, develop] + + +Once installed, |graphnet|\ GraphNeT is available whenever you open the CVMFS locally. + +.. note:: + We recommend installing |graphnet|\ GraphNeT without GPU in clean metaprojects. + diff --git a/docs/source/installation/quick-start.html b/docs/source/installation/quick-start.html new file mode 100644 index 000000000..4850e029a --- /dev/null +++ b/docs/source/installation/quick-start.html @@ -0,0 +1,139 @@ + + +
+
+
PyTorch
+
Your OS
+
CUDA
+
Run:
+
+
+
+
+
+
+
+
+ + + diff --git a/docs/source/substitutions.rst b/docs/source/substitutions.rst new file mode 100644 index 000000000..60dc3602a --- /dev/null +++ b/docs/source/substitutions.rst @@ -0,0 +1,12 @@ +.. |graphnet| image:: ../../../assets/identity/favicon.svg + :width: 25px + :height: 25px + :alt: graphnet + :align: bottom + +.. |graphnet-header| image:: ../../../assets/identity/favicon.svg + :width: 50px + :height: 50px + :alt: graphnet + :align: bottom + diff --git a/requirements/torch_gpu.txt b/requirements/torch_cu118.txt similarity index 100% rename from requirements/torch_gpu.txt rename to requirements/torch_cu118.txt diff --git a/requirements/torch_cu121.txt b/requirements/torch_cu121.txt new file mode 100644 index 000000000..6577d22cc --- /dev/null +++ b/requirements/torch_cu121.txt @@ -0,0 +1,4 @@ +# Contains packages requirements for GPU installation +--find-links https://download.pytorch.org/whl/torch_stable.html +torch==2.2.0+cu118 +--find-links https://data.pyg.org/whl/torch-2.2.0+cu121.html