diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 0000000..03d2268 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,14 @@ +# Codecov configuration to make it a bit less noisy +coverage: + status: + patch: false + project: + default: + threshold: 50% +comment: + layout: "header" + require_changes: false + branches: null + behavior: default + flags: null + paths: null diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..f19f0ca --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,42 @@ +# How to contribute + +We welcome contributions from external contributors, and this document +describes how to merge code changes into this flamel. + +## Getting Started + +* Make sure you have a [GitHub account](https://github.com/signup/free). +* [Fork](https://help.github.com/articles/fork-a-repo/) this repository on GitHub. +* On your local machine, + [clone](https://help.github.com/articles/cloning-a-repository/) your fork of + the repository. + +## Making Changes + +* Add some really awesome code to your local fork. It's usually a [good + idea](http://blog.jasonmeridth.com/posts/do-not-issue-pull-requests-from-your-master-branch/) + to make changes on a + [branch](https://help.github.com/articles/creating-and-deleting-branches-within-your-repository/) + with the branch name relating to the feature you are going to add. +* When you are ready for others to examine and comment on your new feature, + navigate to your fork of flamel on GitHub and open a [pull + request](https://help.github.com/articles/using-pull-requests/) (PR). Note that + after you launch a PR from one of your fork's branches, all + subsequent commits to that branch will be added to the open pull request + automatically. Each commit added to the PR will be validated for + mergability, compilation and test suite compliance; the results of these tests + will be visible on the PR page. +* If you're providing a new feature, you must add test cases and documentation. +* When the code is ready to go, make sure you run the test suite using pytest. +* When you're ready to be considered for merging, check the "Ready to go" + box on the PR page to let the flamel devs know that the changes are complete. + The code will not be merged until this box is checked, the continuous + integration returns checkmarks, + and multiple core developers give "Approved" reviews. + +# Additional Resources + +* [General GitHub documentation](https://help.github.com/) +* [PR best practices](http://codeinthehole.com/writing/pull-requests-and-other-good-practices-for-teams-using-github/) +* [A guide to contributing to software packages](http://www.contribution-guide.org) +* [Thinkful PR example](http://www.thinkful.com/learn/github-pull-request-tutorial/#Time-to-Submit-Your-First-PR) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..c772b96 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,12 @@ +## Description +Provide a brief description of the PR's purpose here. + +## Todos +Notable points that this PR has either accomplished or will accomplish. + - [ ] TODO 1 + +## Questions +- [ ] Question1 + +## Status +- [ ] Ready to go \ No newline at end of file diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml new file mode 100644 index 0000000..8d05e26 --- /dev/null +++ b/.github/workflows/CI.yaml @@ -0,0 +1,64 @@ +name: CI + +on: + # GitHub has started calling new repo's first branch "main" https://github.com/github/renaming + # The cookiecutter uses the "--initial-branch" flag when it runs git-init + push: + branches: + - "master" + pull_request: + branches: + - "master" + schedule: + # Weekly tests run on main by default: + # Scheduled workflows run on the latest commit on the default or base branch. + # (from https://help.github.com/en/actions/reference/events-that-trigger-workflows#scheduled-events-schedule) + - cron: "0 0 * * 0" + +jobs: + test: + name: Test on ${{ matrix.os }}, Python ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [macOS-latest, ubuntu-latest, windows-latest] + python-version: [3.8, 3.9, "3.10"] + + steps: + - uses: actions/checkout@v3 + + - name: Additional info about the build + shell: bash + run: | + uname -a + df -h + ulimit -a + + # More info on options: https://github.com/marketplace/actions/provision-with-micromamba + - uses: mamba-org/provision-with-micromamba@main + with: + environment-file: devtools/conda-envs/test_env.yaml + environment-name: test + channels: conda-forge,defaults + extra-specs: | + python=${{ matrix.python-version }} + + - name: Install package + # conda setup requires this special shell + shell: bash -l {0} + run: | + python -m pip install . --no-deps + micromamba list + + - name: Run tests + # conda setup requires this special shell + shell: bash -l {0} + run: | + pytest -v --cov=flamel --cov-report=xml --color=yes flamel/tests/ + + - name: CodeCov + uses: codecov/codecov-action@v1 + with: + file: ./coverage.xml + flags: unittests + name: codecov-${{ matrix.os }}-py${{ matrix.python-version }} diff --git a/.gitignore b/.gitignore index 3c224b3..e3353eb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,115 @@ -.idea* -*__pycache__* +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +.pytest_cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# profraw files from LLVM? Unclear exactly what triggers this +# There are reports this comes from LLVM profiling, but also Xcode 9. +*profraw + +# In-tree generated files +*/_version.py + +# OSX files +**/.DS_Store + +# Pycharm +.idea diff --git a/.lgtm.yml b/.lgtm.yml new file mode 100644 index 0000000..b0acc3d --- /dev/null +++ b/.lgtm.yml @@ -0,0 +1,12 @@ +# Configure LGTM for this package + +extraction: + python: # Configure Python + python_setup: # Configure the setup + version: 3 # Specify Version 3 +path_classifiers: + library: + - versioneer.py # Set Versioneer.py to an external "library" (3rd party code) + - devtools/* + generated: + - flamel/_version.py diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..1fcc86d --- /dev/null +++ b/AUTHORS @@ -0,0 +1,25 @@ +.. -*- coding: utf-8 -*- + +=================== + Authors of flamel +=================== + +flamel is a command line interface to alchemlyb. It was started by +Dominik Wille @harlor as a replacement for the alchemical-analysis.py +script. + + +Chronological list of authors +----------------------------- + +2018 + - Dominik Wille (@harlor) + +2020 + - Henrik Jaeger (@hejamu) + - Alexander Schlaich (@schlaicha) + +2022 + - Zhiyi Wu (@xiki-tempula) + - Oliver Beckstein (@orbeckst) + \ No newline at end of file diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..80fb3e8 --- /dev/null +++ b/CHANGES @@ -0,0 +1,62 @@ +# -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8 -*- +==================== + flamel CHANGELOG +===================== + +The rules for this file: + * entries are sorted newest-first. + * summarize sets of changes - don't reproduce every git log comment here. + * don't ever delete anything. + * keep the format consistent (79 char width, ISO 8601 date format + YYYY-MM-DD) and do not use tabs but use spaces for formatting + * accompany each entry with github issue/PR number (Issue #xyz) + * release numbers follow "Semantic Versioning" https://semver.org + +------------------------------------------------------------------------------ + +2022-xx-xx xiki-tempula, orbeckst + + * 0.3.0 + +Complete rewrite of the package, it is generally considered +incompatible with previous releases. See Discussion #12 +https://github.com/alchemistry/flamel/discussions/12 for background. + +Changes + +- Removed plugin-based system and changed into a thin CLI to access + alchemlyb.workflows (see Discussion #12). + +Enhancements + +- plotting output from ABFE workflow (issue #7) +- pip installable (issue #3) + + +2021-09-21 hejamu, schlaicha + + * 0.2.0 + +Enhancements + +- pickle output + + +Fixes + +- corrected unit handling +- reading of GROMACS input files + + +2018-11-26 harlor + + * 0.1.0 + +First release (actual release day: 2022-11-06, using @harlor's +original code dating back to 2018). + +Features + +- plugin-based system +- currently only Gromacs parser and uncorrelation by dH/dl is supported +- reproduces TI, BAR, and MBAR results from alchemical-analysis.py \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..53f5c6a --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,77 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, +body size, disability, ethnicity, gender identity and expression, level of +experience, nationality, personal appearance, race, religion, or sexual +identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +Moreover, project maintainers will strive to offer feedback and advice to +ensure quality and consistency of contributions to the code. Contributions +from outside the group of project maintainers are strongly welcomed but the +final decision as to whether commits are merged into the codebase rests with +the team of project maintainers. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an +appointed representative at an online or offline event. Representation of a +project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at 'WILLIAM@ZHIYIWU.ME'. The project team will +review and investigate all complaints, and will respond in a way that it deems +appropriate to the circumstances. The project team is obligated to maintain +confidentiality with regard to the reporter of an incident. Further details of +specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.4, available at +[http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ diff --git a/LICENSE b/LICENSE index d079534..141cac2 100644 --- a/LICENSE +++ b/LICENSE @@ -1,29 +1,21 @@ -BSD 3-Clause License -Copyright (c) 2018, -All rights reserved. +Copyright 2022 alchemistry -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the +following conditions are met: -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following +disclaimer in the documentation and/or other materials provided with the distribution. -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..e0267af --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include CODE_OF_CONDUCT.md + +global-exclude *.py[cod] __pycache__ *.so diff --git a/README.md b/README.md index 67d6dcc..12b71be 100644 --- a/README.md +++ b/README.md @@ -1,126 +1,73 @@ # Flamel -The aim of the project is to develop a **command line interface (CLI) to [alchemlyb](https://github.com/alchemistry/alchemlyb)**, the well-tested and actively developed library for alchemical free energy calculations. -It is supposed to [become the successor](https://github.com/alchemistry/alchemlyb/wiki/Roadmap#librarify-alchemical-analysis-functionality) of the now unsupported [alchemical-analysis](https://github.com/MobleyLab/alchemical-analysis) script. +[//]: # (Badges) +[![GitHub Actions Build Status](https://github.com/alchemistry/flamel/workflows/CI/badge.svg)](https://github.com/alchemistry/flamel/actions?query=workflow%3ACI) +[![codecov](https://codecov.io/gh/alchemistry/flamel/branch/main/graph/badge.svg)](https://codecov.io/gh/alchemistry/flamel/branch/main) ----- -This project is currently *dormant* due to lack of developers. If you are **interested in contributing** please raise issues/open pull requests and ping [@orbeckst](https://github.com/orbeckst) and [@xiki-tempula](https://github.com/xiki-tempula) to get our attention. -We are happy to see new contributors! +The aim of the project is to develop a **command line interface (CLI) to +[alchemlyb](https://github.com/alchemistry/alchemlyb)**, the well-tested and +actively developed library for alchemical free energy calculations. It is +supposed to [become the successor](https://github.com/alchemistry/alchemlyb/wiki/Roadmap#librarify-alchemical-analysis-functionality) +of the now unsupported [alchemical-analysis](https://github.com/MobleyLab/alchemical-analysis) script. -Please read the [proposed future directions](https://github.com/alchemistry/alchemlyb/discussions/159#discussioncomment-1560486), which form the informal roadmap for developments. +## Installation ----- - - -# Installation -1. Download and install alchemlyb -```shell -pip install alchemlyb -``` -2. Download flamel +Clone flamel and install ```shell git clone git@github.com:alchemistry/flamel.git +cd flamel +pip install . ``` -# Usage -Currently only Gromacs parser and uncorrelation by dH/dl is supported! -``` -usage: flamel.py [-h] [-t TEMPERATURE] [-p PREFIX] [-q SUFFIX] [-e ESTIMATORS] - [-n UNCORR] [-r DECIMAL] [-o OUTPUT] [-a SOFTWARE] - [-s EQUILTIME] - -Collect data and estimate free energy differences - -optional arguments: - -h, --help show this help message and exit - -t TEMPERATURE, --temperature TEMPERATURE - Temperature in K. Default: 298 K. (default: 298.0) - -p PREFIX, --prefix PREFIX - Prefix for datafile sets, i.e.'dhdl' (default). - (default: dhdl) - -q SUFFIX, --suffix SUFFIX - Suffix for datafile sets, i.e. 'xvg' (default). - (default: xvg) - -e ESTIMATORS Comma separated Estimator methods (default: None) - -n UNCORR, --uncorr UNCORR - The observable to be used for the autocorrelation - analysis; either 'dhdl_all' (obtained as a sum over - all energy components) or 'dhdl' (obtained as a sum - over those energy components that are changing; - default) or 'dE'. In the latter case the energy - differences dE_{i,i+1} (dE_{i,i-1} for the last - lambda) are used. (default: dhdl) - -r DECIMAL, --decimal DECIMAL - The number of decimal places the free energies are to - be reported with. No worries, this is for the text - output only; the full-precision data will be stored in - 'results.pickle'. Default: 3. (default: 3) - -o OUTPUT, --output OUTPUT - Output methods (default: None) - -a SOFTWARE, --software SOFTWARE - Package's name the data files come from: Gromacs, - Sire, Desmond, or AMBER. Default: Gromacs. (default: - Gromacs) - -s EQUILTIME, --skiptime EQUILTIME - Discard data prior to this specified time as - 'equilibration' data. Units picoseconds. Default: 0 - ps. (default: 0) -``` +## Usage + +The analysis can be invoked with the following command -To read enumerated xvg files lambda_0.xvg, lambda_1.xvg, ... use: ```shell -flamel.py -p lambda_ +flamel -a GROMACS -d dhdl_data -f 10 -g -i 50 -j result.csv -m TI,BAR,MBAR -n dE -o out_data -p dhdl -q xvg -r 3 -s 50 -t 298 -v -w ``` -You should get a similar overview as [alchemical-analysis](https://github.com/MobleyLab/alchemical-analysis). +Run ``flamel -h`` to see the full description of the options. -# How it works -- Step 1: Read the necessary data -- Step 2: Uncorrelate the data -- Step 3: Estimate Free energy differences -- Step 4: Output +## Output -Each step is performed in Plugins which can easyly be be replaced by other plugins. +This script si a warpper around the +[ABFE](https://alchemlyb.readthedocs.io/en/latest/workflows/alchemlyb.workflows.ABFE.html#alchemlyb.workflows.ABFE) +workflow in [alchemlyb](https://github.com/alchemistry/alchemlyb). +The script will generate the output from ABFE workflow, including +[O_MBAR.pdf](https://alchemlyb.readthedocs.io/en/latest/visualisation.html#overlap-matrix-of-the-mbar), +[dF_t.pdf](https://alchemlyb.readthedocs.io/en/latest/visualisation.html#df-states-plots-between-different-estimators), +[dF_state.pdf](https://alchemlyb.readthedocs.io/en/latest/visualisation.html#overlap-matrix-of-the-mbar), +[dF_t.pdf](https://alchemlyb.readthedocs.io/en/latest/visualisation.html#forward-and-backward-convergence), +[dhdl_TI.pdf](https://alchemlyb.readthedocs.io/en/latest/visualisation.html#dhdl-plot-of-the-ti). -# Name -In the tradition to associate free energy estimations with alchemistry it's named after: [Nicolas Flamel](https://en.wikipedia.org/wiki/Nicolas_Flamel) +The script will also generate the `result.csv` and `result.p`, which is a +pandas DataFrame summarising the results. :: -# State of development: -Eventhoug alchemical-analysis is not fully covered by Flamel, it can already reproduce some results calculated using alchemical-analysis: + TI TI_Error BAR BAR_Error MBAR MBAR_Error + States 0 -- 1 0.962 0.007 0.956 0.007 0.964 0.006 + 1 -- 2 0.567 0.006 0.558 0.006 0.558 0.004 + 2 -- 3 0.264 0.005 0.258 0.005 0.254 0.004 + 3 -- 4 0.035 0.004 0.035 0.004 0.030 0.003 + Stages fep 1.828 0.014 1.806 0.016 1.807 0.014 + TOTAL 1.828 0.014 1.806 0.011 1.807 0.014 -In fact for TI, BAR, MBAR you should get exactly the same results: +## Name -Example Flamel output for the [water_particle/without_energy](https://github.com/alchemistry/alchemtest/tree/master/src/alchemtest/gmx/water_particle/without_energy) dataset: -``` ------------- --------------------- --------------------- --------------------- - States TI (kJ/mol) BAR (kJ/mol) MBAR (kJ/mol) ------------- --------------------- --------------------- --------------------- - 0 -- 1 0.074 +- 0.005 0.073 +- 0.005 0.071 +- 0.003 -... - 36 -- 37 -5.472 +- 0.038 -5.475 +- 0.038 -5.457 +- 0.028 ------------- --------------------- --------------------- --------------------- - coul: -41.067 +- 0.129 -41.022 +- nan -41.096 +- 0.170 - vdw: 11.912 +- 0.113 11.954 +- nan 12.022 +- 0.142 - TOTAL: -29.154 +- 0.172 -29.067 +- nan -29.074 +- 0.220 -``` +In the tradition to associate free energy estimations with alchemistry it's +named after [Nicolas Flamel](https://en.wikipedia.org/wiki/Nicolas_Flamel) -Alchemical Analysis with the same input files: -``` ------------- --------------------- --------------------- --------------------- - States TI (kJ/mol) BAR (kJ/mol) MBAR (kJ/mol) ------------- --------------------- --------------------- --------------------- - 0 -- 1 0.074 +- 0.005 0.073 +- 0.005 0.071 +- 0.003 -... - 36 -- 37 -5.472 +- 0.038 -5.475 +- 0.038 -5.457 +- 0.028 ------------- --------------------- --------------------- --------------------- - Coulomb: -41.067 +- 0.180 -41.022 +- 0.129 -41.096 +- 0.170 - vdWaals: 11.912 +- 0.160 11.954 +- 0.111 12.022 +- 0.139 - TOTAL: -29.154 +- 0.241 -29.067 +- 0.170 -29.074 +- 0.220 -``` +## Copyright + +Copyright (c) 2022, the [AUTHORS](./AUTHORS). + + +## Acknowledgements + +@harlor started *flamel* as a replacement for the original +`alchemical-analyis.py` script. + +Project template based on the [Computational Molecular Science Python +Cookiecutter](https://github.com/molssi/cookiecutter-cms) version 1.1. -# Planed features: -- **Output of statistical inefficiencies** -alchemical-analysis offers information about the statistical inefficiencies of the input datasets. -- **Uncorrelation threshold** -In alchemical-analysis it is possible to specify a threshold for the number of samples to keep in the uncorrelation process. diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml new file mode 100644 index 0000000..900ee55 --- /dev/null +++ b/devtools/conda-envs/test_env.yaml @@ -0,0 +1,17 @@ +name: test +channels: + - conda-forge +dependencies: + # Base depends + - python + - pip + + - "alchemlyb>=1.0" + + # Testing + - pytest + - pytest-cov + - pytest-mock + + - pip: + - alchemtest diff --git a/devtools/scripts/create_conda_env.py b/devtools/scripts/create_conda_env.py new file mode 100644 index 0000000..b51adc8 --- /dev/null +++ b/devtools/scripts/create_conda_env.py @@ -0,0 +1,95 @@ +import argparse +import os +import re +import glob +import shutil +import subprocess as sp +from tempfile import TemporaryDirectory +from contextlib import contextmanager +# YAML imports +try: + import yaml # PyYAML + loader = yaml.load +except ImportError: + try: + import ruamel_yaml as yaml # Ruamel YAML + except ImportError: + try: + # Load Ruamel YAML from the base conda environment + from importlib import util as import_util + CONDA_BIN = os.path.dirname(os.environ['CONDA_EXE']) + ruamel_yaml_path = glob.glob(os.path.join(CONDA_BIN, '..', + 'lib', 'python*.*', 'site-packages', + 'ruamel_yaml', '__init__.py'))[0] + # Based on importlib example, but only needs to load_module since its the whole package, not just + # a module + spec = import_util.spec_from_file_location('ruamel_yaml', ruamel_yaml_path) + yaml = spec.loader.load_module() + except (KeyError, ImportError, IndexError): + raise ImportError("No YAML parser could be found in this or the conda environment. " + "Could not find PyYAML or Ruamel YAML in the current environment, " + "AND could not find Ruamel YAML in the base conda environment through CONDA_EXE path. " + "Environment not created!") + loader = yaml.YAML(typ="safe").load # typ="safe" avoids odd typing on output + + +@contextmanager +def temp_cd(): + """Temporary CD Helper""" + cwd = os.getcwd() + with TemporaryDirectory() as td: + try: + os.chdir(td) + yield + finally: + os.chdir(cwd) + + +# Args +parser = argparse.ArgumentParser(description='Creates a conda environment from file for a given Python version.') +parser.add_argument('-n', '--name', type=str, + help='The name of the created Python environment') +parser.add_argument('-p', '--python', type=str, + help='The version of the created Python environment') +parser.add_argument('conda_file', + help='The file for the created Python environment') + +args = parser.parse_args() + +# Open the base file +with open(args.conda_file, "r") as handle: + yaml_script = loader(handle.read()) + +python_replacement_string = "python {}*".format(args.python) + +try: + for dep_index, dep_value in enumerate(yaml_script['dependencies']): + if re.match('python([ ><=*]+[0-9.*]*)?$', dep_value): # Match explicitly 'python' and its formats + yaml_script['dependencies'].pop(dep_index) + break # Making the assumption there is only one Python entry, also avoids need to enumerate in reverse +except (KeyError, TypeError): + # Case of no dependencies key, or dependencies: None + yaml_script['dependencies'] = [] +finally: + # Ensure the python version is added in. Even if the code does not need it, we assume the env does + yaml_script['dependencies'].insert(0, python_replacement_string) + +# Figure out conda path +if "CONDA_EXE" in os.environ: + conda_path = os.environ["CONDA_EXE"] +else: + conda_path = shutil.which("conda") +if conda_path is None: + raise RuntimeError("Could not find a conda binary in CONDA_EXE variable or in executable search path") + +print("CONDA ENV NAME {}".format(args.name)) +print("PYTHON VERSION {}".format(args.python)) +print("CONDA FILE NAME {}".format(args.conda_file)) +print("CONDA PATH {}".format(conda_path)) + +# Write to a temp directory which will always be cleaned up +with temp_cd(): + temp_file_name = "temp_script.yaml" + with open(temp_file_name, 'w') as f: + f.write(yaml.dump(yaml_script)) + sp.call("{} env create -n {} -f {}".format(conda_path, args.name, temp_file_name), shell=True) diff --git a/estimator/__init__.py b/estimator/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/estimator/bar.py b/estimator/bar.py deleted file mode 100644 index 16d70f4..0000000 --- a/estimator/bar.py +++ /dev/null @@ -1,43 +0,0 @@ -import alchemlyb.estimators -import alchemlyb.preprocessing - - -# Todo: Use an interface here... -class Bar: - name = 'BAR' - needs_dhdls = False - needs_u_nks = True - - dhdls = None - u_nks = None - - delta_f = None - d_delta_f = None - - def set_u_nks(self, u_nks): - """ - Setter method for u_nks - :param u_nks: - :return: - """ - self.u_nks = u_nks - - def estimate(self): - """ - Estimate free energy differences with BAR - :return: - """ - bar_est = alchemlyb.estimators.bar_.BAR() - bar_est.fit(self.u_nks) - - # Todo: Think about what data format we want to use here (current: DataFrame) - self.delta_f, self.d_delta_f = bar_est.delta_f_, bar_est.d_delta_f_ - - -def get_plugin(): - return Bar() - -#c29613d34ffafa133c3dc5a90a92ce3a84cbcd0c -#03649d469383a55c305c1daa55de7792c88a22d3 -#2d3a3ffc3dcf66f311c5c03a8a3214c0d0158554 -#d38701718853261c7667ca50fcbe16ec501310b2 \ No newline at end of file diff --git a/estimator/mbar.py b/estimator/mbar.py deleted file mode 100644 index 3f6b518..0000000 --- a/estimator/mbar.py +++ /dev/null @@ -1,38 +0,0 @@ -import alchemlyb.estimators -import alchemlyb.preprocessing - - -# Todo: Use an interface here... -class Mbar: - name = 'MBAR' - needs_dhdls = False - needs_u_nks = True - - dhdls = None - u_nks = None - - delta_f = None - d_delta_f = None - - def set_u_nks(self, u_nks): - """ - Setter method for u_nks - :param u_nks: - :return: - """ - self.u_nks = u_nks - - def estimate(self): - """ - Estimate free energy differences with MBAR - :return: - """ - mbar_est = alchemlyb.estimators.mbar_.MBAR() - mbar_est.fit(self.u_nks) - - # Todo: Think about what data format we want to use here (current: DataFrame) - self.delta_f, self.d_delta_f = mbar_est.delta_f_, mbar_est.d_delta_f_ - - -def get_plugin(): - return Mbar() diff --git a/estimator/ti.py b/estimator/ti.py deleted file mode 100644 index fc610be..0000000 --- a/estimator/ti.py +++ /dev/null @@ -1,37 +0,0 @@ -import alchemlyb.estimators - - -# Todo: Use an interface here... -class Ti: - name = 'TI' - needs_dhdls = True - needs_u_nks = False - - dhdls = None - u_nks = None - - delta_f = None - d_delta_f = None - - def set_dhdls(self, dhdls): - """ - Setter method for dH/dl values - :param dhdls: - :return: - """ - self.dhdls = dhdls - - def estimate(self): - """ - Estimate free energy differences using Trapezoid thermodynamic integration - :return: - """ - ti_est = alchemlyb.estimators.ti_.TI() - ti_est.fit(self.dhdls) - - # Todo: Think about what data format we want to use here (current: DataFrame) - self.delta_f, self.d_delta_f = ti_est.delta_f_, ti_est.d_delta_f_ - - -def get_plugin(): - return Ti() diff --git a/flamel.py b/flamel.py deleted file mode 100755 index c45fc3a..0000000 --- a/flamel.py +++ /dev/null @@ -1,153 +0,0 @@ -#!/usr/bin/env python - -import argparse - - -def get_available_plugin_ids(type): - """ - Get a list of available plugins of a cetrain type - :param type: str - Type of the plugin - :return: Series - List of available plugin names - """ - # Todo: Implement this - - if type == 'estimator': - return ['ti', 'bar', 'mbar'] - if type == 'uncorrelate': - return ['statistical_inefficiency_dhdl', 'statistical_inefficiency_dhdl_all'] - if type == 'output': - return ['simple', 'alchemical_analysis'] - if type == 'parser': - return ['gmx'] - - -def load_plugin_by_name(type, name, *args): - """ - Load a plugin by its name - :param type: str - Plugin type - :param name: str - Plugin name - :param args: - Args passed to the plugin - :return: - The plugin - """ - return load_plugins(type, [name], *args)[0] - - -def load_plugin(type, id, *args): - """ - Load a specific plugin - :param type: str - Type of the plugin - :param id: str - Name of the plugin - :param args: - Args passed to the plugin - :return: - The plugin - """ - # Todo: think about a suitable plugin system - mod = __import__("%s.%s" % (type, id), fromlist=['object']) - return mod.get_plugin(*args) - - -def argsplit(arg): - """ - Helper method to split a comma separated string to a list - :param arg: str - Input string - :return: Series - List of sections in the string - """ - return [] if arg is None else arg.split(',') - - -def load_plugins(type, selected, *args): - """ - Load multiple plugins - :param type: - Type of the plugins to load - :param selected: - List of selected plugins - if empty every available plugin will be used - :return: Series - The list of plugins - """ - available = get_available_plugin_ids(type) - plugins = [] - for plugin_id in available: - plugin = load_plugin(type, plugin_id, *args) - if selected: - if plugin.name in selected: - plugins.append(plugin) - else: - plugins.append(plugin) - - return plugins - - -def main(): - parser = argparse.ArgumentParser(description=""" - Collect data and estimate free energy differences - """, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('-t', '--temperature', dest='temperature', help="Temperature in K. Default: 298 K.", default=298.0, type=float) - parser.add_argument('-p', '--prefix', dest='prefix', help='Prefix for datafile sets, i.e.\'dhdl\' (default).', default='dhdl') - parser.add_argument('-q', '--suffix', dest='suffix', help='Suffix for datafile sets, i.e. \'xvg\' (default).', default='xvg') - parser.add_argument('-e', dest='estimators', type=str, default=None, help="Comma separated Estimator methods") - parser.add_argument('-n', '--uncorr', dest='uncorr', help='The observable to be used for the autocorrelation analysis; either \'dhdl_all\' (obtained as a sum over all energy components) or \'dhdl\' (obtained as a sum over those energy components that are changing; default) or \'dE\'. In the latter case the energy differences dE_{i,i+1} (dE_{i,i-1} for the last lambda) are used.', default='dhdl') - parser.add_argument('-r', '--decimal', dest='decimal', help='The number of decimal places the free energies are to be reported with. No worries, this is for the text output only; the full-precision data will be stored in \'results.pickle\'. Default: 3.', default=3, type=int) - parser.add_argument('-o', '--output', dest='output', type=str, default=None, help="Output methods") - parser.add_argument('-a', '--software', dest='software', help='Package\'s name the data files come from: Gromacs, Sire, Desmond, or AMBER. Default: Gromacs.', default='Gromacs') - parser.add_argument('-s', '--skiptime', dest='equiltime', help='Discard data prior to this specified time as \'equilibration\' data. Units picoseconds. Default: 0 ps.', default=0, type=float) - args = parser.parse_args() - - parser = load_plugin_by_name('parser', args.software, args.temperature, args.prefix, args.suffix) - uncorrelator = load_plugin_by_name('uncorrelate', args.uncorr) - outputs = load_plugins('output', argsplit(args.output)) - estimators = load_plugins('estimator', argsplit(args.estimators)) - - # Step 0: Check what data the uncorrelator and the selected estimators need - do_dhdl = uncorrelator.needs_dhdls - do_u_nks = uncorrelator.needs_u_nks - for estimator in estimators: - if estimator.needs_dhdls: - do_dhdl = True - if estimator.needs_u_nks: - do_u_nks = True - - # Step 1: Read the necessary data - dhdls = None - u_nks = None - if do_dhdl: - dhdls = parser.get_dhdls() - if do_u_nks: - u_nks = parser.get_u_nks() - - # Step 2: Uncorrelate the data - if uncorrelator.needs_dhdls: - uncorrelator.set_dhdls(dhdls) - if uncorrelator.needs_u_nks: - uncorrelator.set_u_nks(u_nks) - - if do_dhdl: - dhdls = uncorrelator.uncorrelate(dhdls, args.equiltime) - if do_u_nks: - u_nks = uncorrelator.uncorrelate(u_nks, args.equiltime) - - # Step 3: Estimate Free energy differences - for estimator in estimators: - if estimator.needs_dhdls: - estimator.set_dhdls(dhdls) - if estimator.needs_u_nks: - estimator.set_u_nks(u_nks) - estimator.estimate() - - # Step 4: Output - for output in outputs: - output.output(estimators, args) - - -main() diff --git a/flamel/__init__.py b/flamel/__init__.py new file mode 100644 index 0000000..48d9853 --- /dev/null +++ b/flamel/__init__.py @@ -0,0 +1,7 @@ +"""A command line interface (CLI) to alchemlyb.""" + +# Add imports here +from .flamel import * + + +from ._version import __version__ diff --git a/flamel/flamel.py b/flamel/flamel.py new file mode 100644 index 0000000..b820c6d --- /dev/null +++ b/flamel/flamel.py @@ -0,0 +1,118 @@ +"""Provide the primary functions.""" + +import argparse +import logging +import pathlib +import pickle +import sys + +from alchemlyb.workflows import ABFE + +def main(): + parser = argparse.ArgumentParser(description=""" + Collect data and estimate free energy differences + """, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('-a', '--software', dest='software', + help='Package\'s name the data files come from: ' + 'GROMACS or AMBER. Default: GROMACS.', + default='GROMACS') + parser.add_argument('-d', '--dir', dest='datafile_directory', + help='Directory in which data files are stored. ' + 'Default: Current directory.', default='./') + parser.add_argument('-f', '--forwrev', dest='forwrev', + help='Plot the free energy change as a function of ' + 'time in both directions, with the specified ' + 'number of points in the time plot. The number ' + 'of time points (an integer) must be provided. ' + 'Default: 0', default=0, type=int) + parser.add_argument('-g', '--breakdown', dest='breakdown', + help='Plot the free energy differences evaluated for ' + 'each pair of adjacent states for all methods, ' + 'including the dH/dlambda curve for TI.' + 'Default: True.', default=True, action='store_true') + parser.add_argument('-i', '--threshold', dest='uncorr_threshold', + help='Proceed with correlated samples if the number of ' + 'uncorrelated samples is found to be less than this' + ' number. If 0 is given, the time series analysis ' + 'will not be performed at all. Default: 50.', + default=50, type=int) + parser.add_argument('-j', '--resultfilename', dest='resultfilename', + help='custom defined result filename prefix. ' + 'Default: results.csv', + default='results.csv') + + parser.add_argument('-m', '--methods', dest='methods', + help='A comma separated list of the methods to estimate ' + 'the free energy with. Default: TI,BAR,MBAR.', + default='TI,BAR,MBAR') + parser.add_argument('-n', '--uncorr', dest='uncorr', + help='The observable to be used for the ' + 'autocorrelation analysis; either \'all\' ' + '(obtained as a sum over all energy components) ' + 'or \'dE\'. In the latter case the energy ' + 'differences dE_{i,i+1} (dE_{i,i-1} for the last ' + 'lambda) are used.', + default='dE') + parser.add_argument('-o', '--out', dest='output_directory', + help='Directory in which the output files produced by ' + 'this script will be stored. Default: Same as ' + 'datafile_directory.', + default='') + parser.add_argument('-p', '--prefix', dest='prefix', + help='Prefix for datafile sets, i.e.\'dhdl\' (default).', + default='dhdl') + parser.add_argument('-q', '--suffix', dest='suffix', + help='Suffix for datafile sets, i.e. \'xvg\' (default).', + default='xvg') + parser.add_argument('-r', '--decimal', dest='decimal', + help='The number of decimal places the free energies ' + 'are to be reported with. No worries, this is for ' + 'the text output only; the full-precision data ' + 'will be stored in \'results.pickle\'. Default: 3.', + default=3, type=int) + parser.add_argument('-s', '--skiptime', dest='equiltime', + help='Discard data prior to this specified time as ' + '\'equilibration\' data. Units picoseconds. ' + 'Default: 0 ps.', + default=0, type=float) + parser.add_argument('-t', '--temperature', dest='temperature', + help="Temperature in K. Default: 298 K.", + default=298.0, type=float) + parser.add_argument('-u', '--units', dest='units', + help='Units to report energies: \'kJ/mol\', \'kcal/mol\', and \'kT\'. Default: \'kcal/mol\'', + default='kcal/mol') + parser.add_argument('-v', '--verbose', dest='verbose', + help='Verbose option. Default: False.', default=False, + action='store_true') + parser.add_argument('-w', '--overlap', dest='overlap', + help='Print out and plot the overlap matrix. Default: True.', + default=True, action='store_true') + + args = parser.parse_args(sys.argv[1:]) + + # Print the logging to the console + if args.verbose: + logging.getLogger().addHandler(logging.StreamHandler()) + + if args.output_directory == '': + out = args.datafile_directory + else: + out = args.output_directory + if args.overlap: + overlap = 'O_MBAR.pdf' + else: + overlap = '' + + workflow = ABFE(T=args.temperature, units=args.units, software=args.software, + dir=args.datafile_directory, prefix=args.prefix, suffix=args.suffix, + outdirectory=out) + workflow.run(skiptime=args.equiltime, uncorr=args.uncorr, + threshold=args.uncorr_threshold, estimators=args.methods.split(','), + overlap=overlap, breakdown=args.breakdown, + forwrev=args.forwrev) + summary = workflow.summary + pickle.dump(summary, open(pathlib.Path(out) / 'result.p', 'wb')) + summary.round(args.decimal).to_csv(pathlib.Path(out) / args.resultfilename) + +if __name__ == "__main__": + main() diff --git a/flamel/py.typed b/flamel/py.typed new file mode 100644 index 0000000..6e3cdd2 --- /dev/null +++ b/flamel/py.typed @@ -0,0 +1 @@ +# PEP 561 marker file. See https://peps.python.org/pep-0561/ diff --git a/flamel/tests/__init__.py b/flamel/tests/__init__.py new file mode 100644 index 0000000..e131175 --- /dev/null +++ b/flamel/tests/__init__.py @@ -0,0 +1,3 @@ +""" +Empty init file in case you choose a package besides PyTest such as Nose which may look for such a file. +""" diff --git a/flamel/tests/test_flamel.py b/flamel/tests/test_flamel.py new file mode 100644 index 0000000..2e122e4 --- /dev/null +++ b/flamel/tests/test_flamel.py @@ -0,0 +1,49 @@ +""" +Unit and regression test for the flamel package. +""" + +# Import package, test suite, and other packages as needed +import sys +import pathlib +import pickle + +import pytest +from numpy.testing import assert_approx_equal + + +from flamel import main +from alchemtest.gmx import load_benzene + +class TestFlamel(): + @staticmethod + @pytest.fixture(scope='session') + def setup(tmp_path_factory, session_mocker): + out = tmp_path_factory.mktemp('out') + in_path = pathlib.Path(load_benzene().data['Coulomb'][0]).parents[1] + session_mocker.patch('sys.argv', + new=f"flamel -a GROMACS -d {in_path} -f 10 -g -i 50 -j result.csv -m TI,BAR,MBAR -n dE -o {out} -p dhdl -q xvg.bz2 -r 3 -s 50 -t 298 -v -w".split( + ' ')) + main() + return out + + def test_overlap(self, setup): + assert (setup / 'O_MBAR.pdf').exists() + + def test_dF_t(self, setup): + assert (setup / 'dF_t.pdf').exists() + + def test_dF_state(self, setup): + assert (setup / 'dF_state.pdf').exists() + + def test_result_csv(self, setup): + assert (setup / 'result.csv').exists() + + def test_result_p(self, setup): + df = pickle.load(open(setup / 'result.p', 'rb')) + assert_approx_equal(df['MBAR']['Stages']['TOTAL'], 1.8, significant=1) + + def test_dF_state_long(self, setup): + assert (setup / 'dF_state_long.pdf').exists() + + def test_dhdl_TI(self, setup): + assert (setup / 'dhdl_TI.pdf').exists() diff --git a/output/__init__.py b/output/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/output/alchemical_analysis.py b/output/alchemical_analysis.py deleted file mode 100644 index 4f445ff..0000000 --- a/output/alchemical_analysis.py +++ /dev/null @@ -1,206 +0,0 @@ -import numpy as np - - -class AlchemicalAnalysis: - name = 'alchemical-analysis' - k_b = 8.3144621E-3 - - @classmethod - def lenr(cls, text, l=21): - """ - Right aligned text in a string with length `l` - :param text: str - The text to align - :param l: int - desired length - :return: str - aligned text - """ - return ' '*(l - len(text)) + text + ' ' - - @classmethod - def lenc(cls, text, l=21): - """ - Center text in a string with length `l` - :param text: str - The text to center - :param l: int - desired length - :return: str - centered text - """ - lr = int((l - len(text)) / 2) - ll = l - len(text) - lr - return ' '*ll + text + ' '*lr + ' ' - - @classmethod - def ls(cls, estimators): - """ - Return a list of lambda values - :param estimators: Series - List of estimator plugins - :return: - The list of lambda values - """ - ls = [] - if estimators: - if estimators[0].needs_dhdls: - means = estimators[0].dhdls.mean(level=estimators[0].dhdls.index.names[1:]) - ls = np.array(means.reset_index()[means.index.names[:]]) - elif estimators[0].needs_u_nks: - means = estimators[0].u_nks.mean(level=estimators[0].u_nks.index.names[1:]) - ls = np.array(means.reset_index()[means.index.names[:]]) - - return ls - - @classmethod - def l_types(cls, estimators): - """ - Return a list of lambda types - :param estimators: Series - List of estimator plugins - :return: - The list of lambda types - """ - l_types = [] - if estimators: - if estimators[0].needs_dhdls: - l_types = estimators[0].dhdls.index.names[1:] - elif estimators[0].needs_u_nks: - l_types = estimators[0].u_nks.index.names[1:] - - return l_types - - @classmethod - def segments(cls, estimators): - """ - Collect and prepare values from different `estimators` into a series of values. - :param estimators: Series - List of estimator plugins - :return: - Segments of values to output - """ - segments = [] - l_types = cls.l_types(estimators) - ls = cls.ls(estimators) - if estimators: - segstart = 0 - ill = [0] * len(l_types) - nl = 0 - for i in range(len(ls)): - l = ls[i] - if (i < len(ls) - 1 and list(np.array(ls[i + 1], dtype=bool)).count(True) > nl) or i == len(ls) - 1: - if nl > 0: - inl = np.array(np.array(l, dtype=bool), dtype=int) - l_name = l_types[list(inl - ill).index(1)] - ill = inl - segments.append((segstart, i, l_name)) - - if i + 1 < len(ls): - nl = list(np.array(ls[i + 1], dtype=bool)).count(True) - segstart = i - return segments - - @classmethod - def prepare_value(cls, value, decimal): - """ - Convert `value` to a str with `decimal` precision. - :param value: float - Value to convert - :param decimal: - Precision - :return: str - str of `value` with `decimal` precision - """ - value_str = str(round(value, decimal)) - if np.isnan(value): - return str(value) + ' '*(decimal - 1) - return value_str + '0'*(decimal - len(value_str.split('.')[1])) - - def output(self, estimators, args): - """ - Print a alchemical-analysis like output. - :param estimators: Series - Series of estimators - :param args: argparse obj - arguments from argparse - :param ls: Series - Lambdas - :return: - """ - t = args.temperature - seglen = 2 * args.decimal + 15 - beta = 1.0 / t / self.k_b - out = '' - segments = self.segments(estimators) - - # First ---- - out += self.lenc('-'*12, 12) - for _ in estimators: - out += self.lenc('-'*seglen) - out += "\n" - - # Labels - out += self.lenc('States', 12) - for estimator in estimators: - out += self.lenr(estimator.name + ' (kJ/mol)' + ' '*args.decimal, seglen) - out += "\n" - - # Second ---- - out += self.lenc('-'*12, 12) - for _ in estimators: - out += self.lenc('-'*seglen) - out += "\n" - - # Free Energy differences for each lambda state - for i, l in enumerate(self.ls(estimators)[:-1]): - out += self.lenc(str(i) + ' -- ' + str(i+1), 12) - - for estimator in estimators: - df = estimator.delta_f - ddf = estimator.d_delta_f - out += self.lenr('%s +- %s' % ( - self.prepare_value(df.values[i, i+1] / beta, args.decimal), - self.prepare_value(ddf.values[i, i+1] / beta, args.decimal) - ), seglen) - out += "\n" - - # Third ---- - out += self.lenc('-'*12, 12) - for _ in estimators: - out += self.lenc('-'*seglen) - out += "\n" - - for segstart, segend, l_name in reversed(segments): - # Segment Energies - out += self.lenr('%s: ' % l_name[:-7], 12) - for estimator in estimators: - df = estimator.delta_f - ddf = estimator.d_delta_f - out += self.lenr('%s +- %s' % ( - self.prepare_value(df.values[segstart, segend] / beta, args.decimal), - self.prepare_value(ddf.values[segstart, segend] / beta, args.decimal) - ), seglen) - out += "\n" - - # TOTAL Energies - out += self.lenr('TOTAL: ', 12) - for estimator in estimators: - df = estimator.delta_f - ddf = estimator.d_delta_f - out += self.lenr('%s +- %s' % ( - self.prepare_value(df.values[0, -1] / beta, args.decimal), - self.prepare_value(ddf.values[0, -1] / beta, args.decimal) - ), seglen) - out += "\n" - - print(out) - - -def get_plugin(): - """ - Get Alchemical analysis output plugin - :return: - Alchemical analysis output plugin - """ - return AlchemicalAnalysis() diff --git a/output/pickle.py b/output/pickle.py new file mode 100644 index 0000000..0c307ed --- /dev/null +++ b/output/pickle.py @@ -0,0 +1,118 @@ +import numpy as np +import pickle +import time +import os + +import alchemlyb.postprocessors.units as units + +class Pickle: + name = 'pickle' + + @classmethod + def ls(cls, estimators): + """ + Return a list of lambda values + :param estimators: Series + List of estimator plugins + :return: + The list of lambda values + """ + ls = [] + if estimators: + if estimators[0].needs_dhdls: + means = estimators[0].dhdls.mean(level=estimators[0].dhdls.index.names[1:]) + ls = np.array(means.reset_index()[means.index.names[:]]) + elif estimators[0].needs_u_nks: + means = estimators[0].u_nks.mean(level=estimators[0].u_nks.index.names[1:]) + ls = np.array(means.reset_index()[means.index.names[:]]) + + return ls + + @classmethod + def l_types(cls, estimators): + """ + Return a list of lambda types + :param estimators: Series + List of estimator plugins + :return: + The list of lambda types + """ + l_types = [] + if estimators: + if estimators[0].needs_dhdls: + l_types = estimators[0].dhdls.index.names[1:] + elif estimators[0].needs_u_nks: + l_types = estimators[0].u_nks.index.names[1:] + + return l_types + + @classmethod + def segments(cls, estimators): + """ + Collect and prepare values from different `estimators` into a series of values. + :param estimators: Series + List of estimator plugins + :return: + Segments of values to output + """ + segments = [] + l_types = cls.l_types(estimators) + ls = cls.ls(estimators) + if estimators: + segstart = 0 + ill = [0] * len(l_types) + nl = 0 + for i in range(len(ls)): + l = ls[i] + if (i < len(ls) - 1 and list(np.array(ls[i + 1], dtype=bool)).count(True) > nl) or i == len(ls) - 1: + if nl > 0: + inl = np.array(np.array(l, dtype=bool), dtype=int) + l_name = l_types[list(inl - ill).index(1)] + ill = inl + segments.append((segstart, i, l_name)) + + if i + 1 < len(ls): + nl = list(np.array(ls[i + 1], dtype=bool)).count(True) + segstart = i + return segments + + def output(self, estimators, args): + + P = args + + P.datafile_directory = os.getcwd() + P.when_analyzed = time.asctime() + P.dFs = {} + P.ddFs = {} + P.dF = {} + + segments = self.segments(estimators) + + for estimator in estimators: + + data = {} + + df = units.get_unit_converter(args.unit)(estimator.delta_f) + ddf = units.get_unit_converter(args.unit)(estimator.d_delta_f) + + for segstart, segend, l_name in reversed(segments): + data[l_name] = (df.values[segstart, segend], + ddf.values[segstart, segend]) + + data['total'] = (df.values[0, -1], ddf.values[0, -1]) + + P.dFs[estimator.name] = df + P.ddFs[estimator.name] = ddf + + P.dF[estimator.name] = data + + pickle.dump(P, open(args.resultfilename + '.pickle', 'wb')) + + +def get_plugin(): + """ + Get simple output plugin + :return: + simple output plugin + """ + return Pickle() diff --git a/output/simple.py b/output/simple.py deleted file mode 100644 index 0bcd608..0000000 --- a/output/simple.py +++ /dev/null @@ -1,37 +0,0 @@ -import alchemlyb.preprocessing -import pandas -import numpy as np - - -class Simple: - name = 'simple' - k_b = 8.3144621E-3 - - def output(self, estimators, args): - """ - Print a alchemical-analysis like output. - :param estimators: Series - Series of estimators - :param args: argparse obj - arguments from argparse - :param ls: Series - Lambdas - :return: - """ - t = args.temperature - for estimator in estimators: - df = estimator.delta_f - ddf = estimator.d_delta_f - beta = 1.0 / t / self.k_b - dfv = df.values[0, -1] / beta - ddfv = ddf.values[0, -1] / beta - print("%s: %f +- %f" % (estimator.name, dfv, ddfv)) - - -def get_plugin(): - """ - Get simple output plugin - :return: - simple output plugin - """ - return Simple() diff --git a/parser/__init__.py b/parser/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/parser/gmx.py b/parser/gmx.py deleted file mode 100644 index d1b884f..0000000 --- a/parser/gmx.py +++ /dev/null @@ -1,98 +0,0 @@ -import os -import alchemlyb.parsing.gmx -import alchemlyb.preprocessing - - -# Todo: Use an interface here... -class Gmx: - name = 'Gromacs' - dhdls = None - uks = None - T = 300.0 - prefix = '' - suffix = 'xvg' - - def __init__(self, T, prefix, suffix): - """ - :param T: float - Temperature in K - :param prefix: str - File prefix - :param suffix: str - File suffix - """ - self.T = T - self.prefix = prefix - self.suffix = suffix - - def get_files(self): - """ - Get a list of files with the given prefix and suffix - :return: Series - The list of files - """ - ls = os.listdir() - - # Build a list of tuples with name and a number - files = [] - for f in ls: - if f[0:len(self.prefix)] == self.prefix and f[-len(self.suffix):] == self.suffix: - name = f[len(self.prefix):-len(self.suffix)] - num = int(''.join([c for c in name if c.isdigit()])) - files.append((f, num)) - - # Sort the list by of tuples by the number - sorted_files = sorted(files, key=lambda tup: tup[1]) - - # Return only the file names - return [f[0] for f in sorted_files] - - def get_dhdls(self): - """ - Read dH/dl values from files - :return: Series - List of dH/dl data frames - """ - files = self.get_files() - dhdls_ = [] - - for fname in files: - print('Read dH/dl from %s' % fname) - dhdl_ = alchemlyb.parsing.gmx.extract_dHdl(fname, self.T) - dhdls_.append(dhdl_) - - return dhdls_ - - def get_u_nks(self): - """ - Read u_nk values from files - :return: - List of u_nk data frames - """ - files = self.get_files() - uks_ = [] - - for fname in files: - print('Read U_nk from %s' % fname) - - uk = alchemlyb.parsing.gmx.extract_u_nk(fname, self.T) - - uk_ = uk.copy() - uks_.append(uk_) - - return uks_ - - -def get_plugin(*args): - """ - Get the GMX parser plugin - :param T: float - Temperature in K - :param prefix: str - File prefix - :param suffix: str - File suffix - :return: Parser - The GMX parser - """ - return Gmx(*args) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d74b7cb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,89 @@ +[build-system] +requires = ["setuptools>=61.0", "versioningit~=2.0"] +build-backend = "setuptools.build_meta" + +# Self-descriptive entries which should always be present +# https://packaging.python.org/en/latest/specifications/declaring-project-metadata/ +[project] +name = "flamel" +description = "A command line interface (CLI) to alchemlyb." +dynamic = ["version"] +readme = "README.md" +authors = [ + { name = "alchemistry", email = "william@zhiyiwu.me" } +] +license = { text = "BSD-3-Clause" } +# See https://pypi.org/classifiers/ +classifiers = [ + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3", +] +requires-python = ">=3.8" +# Declare any run-time dependencies that should be installed with the package. +dependencies = [ + "importlib-resources;python_version<'3.10'", + "alchemlyb>=1.0" +] + +# Update the urls once the hosting is set up. +#[project.urls] +#"Source" = "https://github.com//flamel/" +#"Documentation" = "https://flamel.readthedocs.io/" + +[project.scripts] +flamel = "flamel.flamel:main" + +[project.optional-dependencies] +test = [ + "pytest>=6.1.2", + "pytest-runner" +] + +[tool.setuptools] +# This subkey is a beta stage development and keys may change in the future, see https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html for more details +# +# As of version 0.971, mypy does not support type checking of installed zipped +# packages (because it does not actually import the Python packages). +# We declare the package not-zip-safe so that our type hints are also available +# when checking client code that uses our (installed) package. +# Ref: +# https://mypy.readthedocs.io/en/stable/installed_packages.html?highlight=zip#using-installed-packages-with-mypy-pep-561 +zip-safe = false +# Let setuptools discover the package in the current directory, +# but be explicit about non-Python files. +# See also: +# https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html#setuptools-specific-configuration +# Note that behavior is currently evolving with respect to how to interpret the +# "data" and "tests" subdirectories. As of setuptools 63, both are automatically +# included if namespaces is true (default), even if the package is named explicitly +# (instead of using 'find'). With 'find', the 'tests' subpackage is discovered +# recursively because of its __init__.py file, but the data subdirectory is excluded +# with include-package-data = false and namespaces = false. +include-package-data = false +[tool.setuptools.packages.find] +namespaces = false +where = ["."] + +# Ref https://setuptools.pypa.io/en/latest/userguide/datafiles.html#package-data +[tool.setuptools.package-data] +flamel = [ + "py.typed" +] + +[tool.versioningit] +default-version = "1+unknown" + +[tool.versioningit.format] +distance = "{base_version}+{distance}.{vcs}{rev}" +dirty = "{base_version}+{distance}.{vcs}{rev}.dirty" +distance-dirty = "{base_version}+{distance}.{vcs}{rev}.dirty" + +[tool.versioningit.vcs] +# The method key: +method = "git" # <- The method name +# Parameters to pass to the method: +match = ["*"] +default-tag = "1.0.0" + +[tool.versioningit.write] +file = "flamel/_version.py" diff --git a/uncorrelate/__init__.py b/uncorrelate/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/uncorrelate/statistical_inefficiency_de.py b/uncorrelate/statistical_inefficiency_de.py deleted file mode 100644 index b882403..0000000 --- a/uncorrelate/statistical_inefficiency_de.py +++ /dev/null @@ -1,40 +0,0 @@ -import alchemlyb.preprocessing -import pandas - - -# Todo: Test this, correct this, documentation -class StatisticalInefficiencyUnks: - name = 'dE' - needs_dhdls = False - needs_u_nks = True - - uks = None - - def set_u_nks(self, u_nks): - self.uks = u_nks - - def uncorrelate(self, dfs, lower): - l_values_ = [] - - for uk_ in self.uks: - l_values_.append(list(uk_.xs(0, level=0).index.values[0])) - - i = 0 - uncorrelated_dfs = [] - statinefs = [] - for uk, df in zip(self.uks, dfs): - if i + 1 < len(self.uks): - s = uk.iloc[:, 0:i + 2] - else: - s = uk.iloc[:, 0:i] - - uncorrelated_df, statinef = alchemlyb.preprocessing.statistical_inefficiency(df, s, lower, conservative=False) - uncorrelated_dfs.append(uncorrelated_df) - statinefs.append(statinef) - i += 1 - - return pandas.concat(uncorrelated_dfs) - - -def get_plugin(*args): - return StatisticalInefficiencyUnks() diff --git a/uncorrelate/statistical_inefficiency_dhdl.py b/uncorrelate/statistical_inefficiency_dhdl.py deleted file mode 100644 index 2a29dcc..0000000 --- a/uncorrelate/statistical_inefficiency_dhdl.py +++ /dev/null @@ -1,68 +0,0 @@ -import alchemlyb.preprocessing -import pandas -import numpy as np - - -# Todo: Use interface here -class StatisticalInefficiencyDhdl: - name = 'dhdl' - - needs_dhdls = True - needs_u_nks = False - - dhdl = None - - def set_dhdls(self, dhdls): - """ - :param dhdls: Series - List of dH/dl values - :return: - """ - self.dhdls = dhdls - - def uncorrelate(self, dfs, lower): - """ - :param dfs: Series - List of data to uncorrelate - :return: Dataframe - uncorrelated Dataframe of `dfs` - """ - l_values_ = [] - - for dhdl_ in self.dhdls: - if len(dhdl_.columns) == 1: - l_values_.append(list([dhdl_.xs(0, level=0).index.values[0]])) - else: - l_values_.append(list(dhdl_.xs(0, level=0).index.values[0])) - - dl = [] - for i, l in enumerate(l_values_): - dli = [] - for j, lij in enumerate(l): - dlij = False - if i < len(l_values_) - 1: - if l_values_[i+1][j] != lij: - dlij = True - if i > 0: - if l_values_[i - 1][j] != lij: - dlij = True - dli.append(dlij) - dl.append(dli) - - uncorrelated_dfs = [] - for dhdl_, l, df in zip(self.dhdls, dl, dfs): - ind = np.array(l, dtype=bool) - ind = np.array(ind, dtype=int) - dhdl_sum = dhdl_.dot(ind) - uncorrelated_dfs.append(alchemlyb.preprocessing.statistical_inefficiency(df, dhdl_sum, lower, conservative=False)) - - return pandas.concat(uncorrelated_dfs) - - -def get_plugin(*args): - """ - :param args: - :return: - Statitical inefficiency uncorrelator - """ - return StatisticalInefficiencyDhdl() diff --git a/uncorrelate/statistical_inefficiency_dhdl_all.py b/uncorrelate/statistical_inefficiency_dhdl_all.py deleted file mode 100644 index fff38a5..0000000 --- a/uncorrelate/statistical_inefficiency_dhdl_all.py +++ /dev/null @@ -1,45 +0,0 @@ -import alchemlyb.preprocessing -import pandas -import numpy as np - - -# Todo: Use interface here -class StatisticalInefficiencyDhdlAll: - name = 'dhdl_all' - - needs_dhdls = True - needs_u_nks = False - - dhdl = None - - def set_dhdls(self, dhdls): - """ - :param dhdls: Series - List of dH/dl values - :return: - """ - self.dhdls = dhdls - - def uncorrelate(self, dfs, lower): - """ - :param dfs: Series - List of data to uncorrelate - :return: Dataframe - uncorrelated Dataframe of `dfs` - """ - - uncorrelated_dfs = [] - for dhdl_, df in zip(self.dhdls, dfs): - dhdl_sum = dhdl_.sum(axis=1) - uncorrelated_dfs.append(alchemlyb.preprocessing.statistical_inefficiency(df, dhdl_sum, lower, conservative=False)) - - return pandas.concat(uncorrelated_dfs) - - -def get_plugin(*args): - """ - :param args: - :return: - Statitical inefficiency uncorrelator using a sum of all dhdls - """ - return StatisticalInefficiencyDhdlAll()