Skip to content

Commit

Permalink
Fix/gh actions and docs imporovements (#30)
Browse files Browse the repository at this point in the history
* docs changes

* updates to pypi workflow

* formatting docs and badges

* py3.8 for docs

* updates to docs formatting and theme

* removed old theme ref in conf.py

* updated gh actions workflows

* added pre-commit hooks, updated makefile

* updated gitignore

* refactored test path

* updated rst files and sphinx requirement

* formatting and import sorting

* refactored tests for datasets

* docstrings fixes

* bump up version
  • Loading branch information
omsh authored Feb 5, 2024
1 parent 5ef83fc commit 510e522
Show file tree
Hide file tree
Showing 55 changed files with 946 additions and 466 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ on:
push:
branches:
- main
- develop

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.7"]
python-version: ["3.8"]

steps:
- uses: actions/checkout@v2
Expand Down
14 changes: 10 additions & 4 deletions .github/workflows/pypi.yaml
Original file line number Diff line number Diff line change
@@ -1,35 +1,41 @@
name: PyPI

on:
workflow_dispatch:
workflow_run:
workflows: ["Build"]
types:
- completed
push:
branches:
- main

jobs:
deploy:
release:
runs-on: ubuntu-latest
if: ${{ github.event.workflow_run.conclusion == 'success' }}
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.7'
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build package
run: python -m build
- name: Publish package
- name: publish_pypi
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
with:
user: __token__
password: ${{ secrets.SECRET_PYPI }}
- name: Create Github release
if: steps.publish_pypi.outcome == 'success'
env:
GITHUB_TOKEN: ${{ secrets.CLI_GH_TOKEN }}
run: |
VERSION=$(python -c "from dlomix import __version__; print(__version__);")
VERSION=$(python setup.py --version)
gh release create v$VERSION --title $VERSION --generate-notes
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ coverage.xml
.hypothesis/
.pytest_cache/

# put all coverage in one place
cov/

# Translations
*.mo
*.pot
Expand Down Expand Up @@ -150,13 +153,18 @@ notebooks/wandb

# local to do file if exists :)
todo.txt
todo.md

.DS_Store

# model checkpoints in the run scripts directory
run_scripts/checkpoint*
run_scripts/*.index
run_scripts/*.data-*
run_scripts/*.csv

# testing metadata
metadata.parquet

# test assets (will be downloaded the first time tests are run and then ignore by git)
assets/
16 changes: 16 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black"]
- repo: https://github.com/psf/black
rev: 23.1.0
hooks:
- id: black
2 changes: 1 addition & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ version: 2
build:
os: ubuntu-20.04
tools:
python: "3.7"
python: "3.8"

# Build documentation in the docs/ directory with Sphinx
sphinx:
Expand Down
29 changes: 28 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,17 +1,44 @@
install:
pip install --upgrade pip && pip install -e .

uninstall:
pip uninstall dlomix -y

install-nodeps:
pip install --upgrade pip && pip install -e . --no-deps


install-dev:
pip install --upgrade pip && pip install -e .[dev]

.PHONY: test
test:
python -m pytest ./test/*.py --doctest-modules --junitxml=junit/test-results.xml --cov=dlomix --cov-report=xml --cov-report=html
make uninstall
make install
mkdir -p cov/

python -m pytest tests/ --junitxml=junit/test-results.xml --cov=dlomix --cov-report html:cov/cov_html --cov-report xml:cov/cov.xml --cov-report lcov:cov/cov.info --cov-report annotate:cov/cov_annotate

test-local:
make uninstall
make install-nodeps
mkdir -p cov/

python -m pytest tests/ --junitxml=junit/test-results.xml --cov=dlomix --cov-report html:cov/cov_html --cov-report xml:cov/cov.xml --cov-report lcov:cov/cov.info --cov-report annotate:cov/cov_annotate


format:
black ./dlomix/*
isort --profile black .
black ./dlomix/*.py
black ./run_scripts/*.py
black ./tests/*.py

lint:
pylint --disable=R,C ./dlomix/*

build-docs:
cd docs && make clean html
cd docs/_build/html/ && open index.html

all: install format test
4 changes: 2 additions & 2 deletions dlomix/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
__version__ = "0.0.6"
__version__ = "0.0.7"

META_DATA = {
"author": "Omar Shouman",
"author_email": "[email protected]",
"description": "Deep Learning for Proteomics",
"package_name": "DLOmix",
"copyright_text": "2023, Wilhelm Lab, TU Munich.",
"copyright_text": "2024, Wilhelm Lab, TU Munich.",
"github_url": "https://github.com/wilhelm-lab/dlomix",
}
29 changes: 15 additions & 14 deletions dlomix/data/RetentionTimeDataset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
import pandas as pd

import numpy as np
import pandas as pd
import tensorflow as tf

from dlomix.constants import DEFAULT_PARQUET_ENGINE

"""
Expand All @@ -22,29 +24,29 @@ class RetentionTimeDataset:
data_source : str, tuple of two numpy.ndarray, numpy.ndarray, optional
source can be a tuple of two arrays (sequences, targets), single array (sequences), useful for test data, or a str with a file path to a csv file. Defaults to None.
sep : str, optional
separator to be used if the data source is a CSV file. Defaults to ",".
separator to be used if the data source is a CSV file. Defaults to ``,``.
sequence_col : str, optional
name of the column containing the sequences in the provided CSV. Defaults to "sequence".
name of the column containing the sequences in the provided CSV. Defaults to ``sequence``.
target_col : str, optional
name of the column containing the targets (indexed retention time). Defaults to "irt".
name of the column containing the targets (indexed retention time). Defaults to ``irt``.
feature_cols : list, optional
a list of columns containing other features that can be used later as inputs to a model. Defaults to None.
a list of columns containing other features that can be used later as inputs to a model. Defaults to ``None``.
normalize_targets : bool, optional
a boolean whether to normalize the targets or not (subtract mean and divied by standard deviation). Defaults to False.
a boolean whether to normalize the targets or not (subtract mean and divied by standard deviation). Defaults to ``False``.
seq_length : int, optional
the sequence length to be used, where all sequences will be padded to this length, longer sequences will be removed and not truncated. Defaults to 0.
the sequence length to be used, where all sequences will be padded to this length, longer sequences will be removed and not truncated. Defaults to ``0``.
batch_size : int, optional
the batch size to be used for consuming the dataset in training a model. Defaults to 32.
the batch size to be used for consuming the dataset in training a model. Defaults to ``32``.
val_ratio : int, optional
a fraction to determine the size of the validation data (0.2 = 20%). Defaults to 0.
a fraction to determine the size of the validation data ``0.2 = 20%``. Defaults to ``0``.
seed: int, optional
a seed to use for splitting the data to allow for a reproducible split. Defaults to 21.
a seed to use for splitting the data to allow for a reproducible split. Defaults to ``21``.
test :bool, optional
a boolean whether the dataset is a test dataset or not. Defaults to False.
a boolean whether the dataset is a test dataset or not. Defaults to ``False``.
path_aminoacid_atomcounts : str, optional
a string with a path to a CSV table with the atom counts of the different amino acids (can be used for feature extraction). Defaults to None.
a string with a path to a CSV table with the atom counts of the different amino acids (can be used for feature extraction). Defaults to ``None``.
sample_run : bool, optional
a boolean to limit the number of examples to a small number, SAMPLE_RUN_N, for testing and debugging purposes. Defaults to False.
a boolean to limit the number of examples to a small number, SAMPLE_RUN_N, for testing and debugging purposes. Defaults to ``False``.
"""
ATOM_TABLE = None
SPLIT_NAMES = ["train", "val", "test"]
Expand Down Expand Up @@ -376,7 +378,6 @@ def _pad_sequences(self, seq, target):
return seq, target

def _normalize_target(self, seq, target):

target = tf.math.divide(
tf.math.subtract(target, self._data_mean), self._data_std
)
Expand Down
2 changes: 1 addition & 1 deletion dlomix/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .RetentionTimeDataset import *
from .IntensityDataset import *
from .RetentionTimeDataset import *

__all__ = ["RetentionTimeDataset", "IntensityDataset"]
26 changes: 16 additions & 10 deletions dlomix/eval/rt_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,24 @@


class TimeDeltaMetric(tf.keras.metrics.Metric):
"""Implementation of the time delta metric as a Keras Metric.
r"""Implementation of the time delta metric as a Keras Metric.
Parameters
----------
mean (int, optional): Mean value of the targets in case normalization was performed. Defaults to 0.
std (int, optional): Standard deviation value of the targets in case normalization was performed. Defaults to 1.
percentage (float, optional): What percentage of the data points to consider, this is specific to the conmputation of the metric. Defaults to 0.95 which corresponds to 95% of the datapoints and is the mostly used value in papers.
name (str, optional): Name of the metric so that it can be reported and used later in Keras History objects. Defaults to 'timedelta'.
rescale_targets (bool, optional): Whether to rescale (denormalize) targets or not. Defaults to False.
rescale_predictions (bool, optional): Whether to rescale (denormalize) predictions or not. Defaults to False.
double_delta (bool, optional): Whether to multiple the computed delta by 2 in order to make it two-sided or not. Defaults to False.
mean : int, optional
Mean value of the targets in case normalization was performed. Defaults to 0.
std : int, optional
Standard deviation value of the targets in case normalization was performed. Defaults to 1.
percentage : float, optional
What percentage of the data points to consider, this is specific to the computation of the metric. Defaults to 0.95 which corresponds to 95% of the data points and is the mostly used value in papers.
name : str, optional
Name of the metric so that it can be reported and used later in Keras History objects. Defaults to 'timedelta'.
rescale_targets : bool, optional
Whether to rescale (denormalize) targets or not. Defaults to False.
rescale_predictions : bool, optional
Whether to rescale (denormalize) predictions or not. Defaults to False.
double_delta : bool, optional
Whether to multiply the computed delta by 2 in order to make it two-sided or not. Defaults to False.
"""

def __init__(
Expand All @@ -27,7 +34,6 @@ def __init__(
double_delta=False,
**kwargs
):

super(TimeDeltaMetric, self).__init__(name=name, **kwargs)
self.delta = self.add_weight(name="delta", initializer="zeros")
self.batch_count = self.add_weight(name="batch-count", initializer="zeros")
Expand Down Expand Up @@ -66,7 +72,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
self.delta.assign_add(tf.math.reduce_sum(d))

def result(self):
# this is simple averaging over the batches, more complex reduction can be added based on domain expertises
# this is simple averaging over the batches, more complex reduction can be added based on domain expertise
# Examples are: take max or min of both deltas (translates to a strict or a relaxed metric)
return tf.math.divide(self.delta, self.batch_count)

Expand Down
3 changes: 1 addition & 2 deletions dlomix/layers/attention.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import regularizers, constraints, initializers, activations
from tensorflow.keras import activations, constraints, initializers, regularizers


class DecoderAttentionLayer(tf.keras.layers.Layer):
Expand Down Expand Up @@ -34,7 +34,6 @@ def __init__(
bias=True,
**kwargs
):

self.supports_masking = True
self.init = initializers.get("glorot_uniform")
self.W_regularizer = regularizers.get(W_regularizer)
Expand Down
2 changes: 1 addition & 1 deletion dlomix/losses/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .intensity import masked_spectral_distance, masked_pearson_correlation_distance
from .intensity import masked_pearson_correlation_distance, masked_spectral_distance

__all__ = [masked_spectral_distance, masked_pearson_correlation_distance]
4 changes: 2 additions & 2 deletions dlomix/losses/intensity.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def masked_pearson_correlation_distance(y_true, y_pred):

mx = tf.math.reduce_mean(true_masked)
my = tf.math.reduce_mean(pred_masked)
xm, ym = true_masked-mx, pred_masked-my
xm, ym = true_masked - mx, pred_masked - my
r_num = tf.math.reduce_mean(tf.multiply(xm, ym))
r_den = tf.math.reduce_std(xm) * tf.math.reduce_std(ym)
return 1 - (r_num/r_den)
return 1 - (r_num / r_den)
15 changes: 10 additions & 5 deletions dlomix/models/base.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing

from dlomix.constants import ALPHABET_UNMOD


class RetentionTimePredictor(tf.keras.Model):
"""A simple class for Retention Time prediction models.
r"""A simple class for Retention Time prediction models.
Parameters
----------
embedding_dim (int, optional): Dimensionality of the embeddings to be used for representing the Amino Acids. Defaults to 16.
seq_length (int, optional): Sequence length of the peptide sequences. Defaults to 30.
encoder (str, optional): String for specifying the decoder to use, either based on 1D conv-layers or LSTMs. Defaults to "conv1d".
vocab_dict (dict, optional): Dictionary mapping for the vocabulary (the amino acids in this case). Defaults to ALPHABET_UNMOD.
embedding_dim: int, optional
Dimensionality of the embeddings to be used for representing the Amino Acids. Defaults to ``16``.
seq_length: int, optional
Sequence length of the peptide sequences. Defaults to ``30``.
encoder: str, optional
String for specifying the decoder to use, either based on 1D conv-layers or LSTMs. Defaults to ``conv1d``.
vocab_dict: dict, optional
Dictionary mapping for the vocabulary (the amino acids in this case). Defaults to ``ALPHABET_UNMOD``.
"""

def __init__(
Expand Down
5 changes: 2 additions & 3 deletions dlomix/models/deepLC.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import tensorflow as tf
from dlomix.constants import ALPHABET_UNMOD
from tensorflow.keras.layers.experimental import preprocessing

from dlomix.constants import ALPHABET_UNMOD


class DeepLCRetentionTimePredictor(tf.keras.Model):
def __init__(
Expand Down Expand Up @@ -38,7 +39,6 @@ def _build_aminoacid_branch(self):
)

def _build_diaminoacid_branch(self):

self.diaminoacid_branch = tf.keras.Sequential(
[
self._build_conv_pool_block(n_filters=128, kernel=2, padding="same"),
Expand Down Expand Up @@ -87,7 +87,6 @@ def _build_conv_pool_block(
pool_strides=2,
pool_size=2,
):

# leaky relu by default
activation_fn = self.leaky_relu

Expand Down
Loading

0 comments on commit 510e522

Please sign in to comment.