Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename pkg from vaep to pimmslearn #74

Merged
merged 13 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
channel-priority: disabled
python-version: ${{ matrix.python-version }}
environment-file: environment.yml
activate-environment: vaep
activate-environment: pimms
auto-activate-base: true
# auto-update-conda: true
- name: inspect-conda-environment
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/workflow_website.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
channel-priority: disabled
python-version: "3.8"
environment-file: environment.yml
activate-environment: vaep
activate-environment: pimms
auto-activate-base: true
# auto-update-conda: true
- name: Dry-run workflow
Expand Down
201 changes: 144 additions & 57 deletions README.md

Large diffs are not rendered by default.

12 changes: 3 additions & 9 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,10 @@ In order to build the docs you need to

Command to be run from `path/to/pimms/docs`, i.e. from within the `docs` package folder:

```bash
# pip install pimms[docs]
# pwd: ./vaep/docs
conda env update -f environment.yml
```

If you prefer pip, run
Install pimms-learn with docs option locally

```bash
# pwd: ./vaep
# pwd: ./pimms
pip install .[docs]
```

Expand All @@ -31,7 +25,7 @@ Options:
```bash
# pwd: ./pimms/docs
# apidoc
sphinx-apidoc --force --implicit-namespaces --module-first -o reference ../vaep
sphinx-apidoc --force --implicit-namespaces --module-first -o reference ../pimmslearn
# build docs
sphinx-build -n -W --keep-going -b html ./ ./_build/
```
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
from pathlib import Path

PROJECT_ROOT = Path(__file__).parent.parent
PACKAGE_ROOT = PROJECT_ROOT / "vaep"
PACKAGE_ROOT = PROJECT_ROOT / "pimmslearn"

def run_apidoc(_):
from sphinx.ext import apidoc
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Dev Environment
name: vaep
name: pimms
channels:
- conda-forge
- pytorch
Expand Down
4 changes: 2 additions & 2 deletions vaep/README.md → pimmslearn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

## Imputation
- imputation of data is done based on the standard variation or KNN imputation
- adapted scripts from Annelaura are under `vaep/imputation.py`
- adapted scripts from Annelaura are under `pimmslearn/imputation.py`

## Transform
- transformation of intensity data is in `vaep/transfrom.py`
- transformation of intensity data is in `pimmslearn/transfrom.py`


## Utils
Expand Down
23 changes: 15 additions & 8 deletions vaep/__init__.py → pimmslearn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
"""
VAEP
Variatonal autoencoder for proteomics
pimmslearn: a package for imputation using self-supervised deep learning models:

1. Collaborative Filtering
2. Denoising Autoencoder
3. Variational Autoencoder

The package offers Imputation transformers in the style of scikit-learn.

PyPI package is called pimms-learn (with a hyphen).
"""
from __future__ import annotations

Expand All @@ -10,18 +17,18 @@

import njab

import vaep.logging
import vaep.nb
import vaep.pandas
import vaep.plotting
import pimmslearn.logging
import pimmslearn.nb
import pimmslearn.pandas
import pimmslearn.plotting

_logging.getLogger(__name__).addHandler(_logging.NullHandler())


# put into some pandas_cfg.py file and import all


savefig = vaep.plotting.savefig
savefig = pimmslearn.plotting.savefig

__license__ = 'GPLv3'
__version__ = metadata.version("pimms-learn")
Expand All @@ -33,4 +40,4 @@

njab.pandas.set_pandas_number_formatting(float_format='{:,.3f}')

vaep.plotting.make_large_descriptors('x-large')
pimmslearn.plotting.make_large_descriptors('x-large')
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""
from types import SimpleNamespace

from vaep.analyzers import compare_predictions, diff_analysis
from pimmslearn.analyzers import compare_predictions, diff_analysis

__all__ = ['diff_analysis', 'compare_predictions', 'Analysis']

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
from njab.sklearn import run_pca
from sklearn.impute import SimpleImputer

import vaep
from vaep.analyzers import Analysis
from vaep.io.datasplits import long_format, wide_format
from vaep.io.load import verify_df
from vaep.pandas import _add_indices
import pimmslearn
from pimmslearn.analyzers import Analysis
from pimmslearn.io.datasplits import long_format, wide_format
from pimmslearn.io.load import verify_df
from pimmslearn.pandas import _add_indices

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -379,7 +379,7 @@ def _plot(self, fct, meta_key: str, save: bool = True):
meta=meta_data.loc[self.latent_reduced.index],
title=f'{self.model_name} latent space PCA of {self.latent_dim} dimensions by {meta_key}')
if save:
vaep.plotting._savefig(fig, name=f'{self.model_name}_latent_by_{meta_key}',
pimmslearn.plotting._savefig(fig, name=f'{self.model_name}_latent_by_{meta_key}',
folder=self.folder)
return fig, ax

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions vaep/io/__init__.py → pimmslearn/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np
import pandas as pd

import vaep.pandas
import pimmslearn.pandas

PathsList = namedtuple('PathsList', ['files', 'folder'])

Expand Down Expand Up @@ -86,7 +86,7 @@ def get_fname_from_keys(keys, folder='.', file_ext='.pkl', remove_duplicates=Tru
keys = list(dict.fromkeys(keys))
folder = Path(folder)
folder.mkdir(exist_ok=True, parents=True)
fname_dataset = folder / '{}{}'.format(vaep.pandas.replace_with(
fname_dataset = folder / '{}{}'.format(pimmslearn.pandas.replace_with(
' '.join(keys), replace='- ', replace_with='_'), file_ext)
return fname_dataset

Expand Down
12 changes: 6 additions & 6 deletions vaep/io/dataloaders.py → pimmslearn/io/dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from fastai.data.load import DataLoader
from torch.utils.data import Dataset

from vaep.io import datasets
from vaep.io.datasets import DatasetWithTarget
from vaep.transform import VaepPipeline
from pimmslearn.io import datasets
from pimmslearn.io.datasets import DatasetWithTarget
from pimmslearn.transform import VaepPipeline


def get_dls(train_X: pandas.DataFrame,
Expand Down Expand Up @@ -42,8 +42,8 @@ def get_dls(train_X: pandas.DataFrame,
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

from vaep.dataloader import get_dls
from vaep.transform import VaepPipeline
from pimmslearn.dataloader import get_dls
from pimmslearn.transform import VaepPipeline

dae_default_pipeline = sklearn.pipeline.Pipeline(
[('normalize', StandardScaler()),
Expand Down Expand Up @@ -86,7 +86,7 @@ def get_test_dl(df: pandas.DataFrame,
----------
df : pandas.DataFrame
Test data in a DataFrame
transformer : vaep.transform.VaepPipeline
transformer : pimmslearn.transform.VaepPipeline
Pipeline with separate encode and decode
dataset : torch.utils.data.Dataset, optional
torch Dataset to yield encoded samples, by default DatasetWithTarget
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions vaep/io/datasplits.py → pimmslearn/io/datasplits.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

import pandas as pd

from vaep.io.format import class_full_module, classname
from vaep.pandas import interpolate
from pimmslearn.io.format import class_full_module, classname
from pimmslearn.pandas import interpolate

logger = logging.getLogger(__name__)

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion vaep/logging.py → pimmslearn/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def setup_logger_w_file(logger, level=logging.INFO, fname_base=None):
Examples
--------
>>> import logging
>>> logger = logging.getLogger('vaep')
>>> logger = logging.getLogger('pimmslearn')
>>> _ = setup_logger_w_file(logger) # no logging to file
>>> logger.handlers = [] # reset logger
>>> _ = setup_logger_w_file() #
Expand Down
File renamed without changes.
10 changes: 5 additions & 5 deletions vaep/models/__init__.py → pimmslearn/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
from fastai import learner
from fastcore.foundation import L

import vaep
from vaep.models import ae, analysis, collab, vae
import pimmslearn
from pimmslearn.models import ae, analysis, collab, vae

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -91,7 +91,7 @@ def plot_training_losses(learner: learner.Learner,
norm_train=norm_train, norm_val=norm_val)
name = name.lower()
_ = RecorderDump(learner.recorder, name).save(folder)
vaep.savefig(fig, name=f'{name}_training',
pimmslearn.savefig(fig, name=f'{name}_training',
folder=folder)
return fig

Expand Down Expand Up @@ -218,7 +218,7 @@ def collect_metrics(metrics_jsons: List, key_fct: Callable) -> dict:
logger.debug(f"{key = }")
with open(fname) as f:
loaded = json.load(f)
loaded = vaep.pandas.flatten_dict_of_dicts(loaded)
loaded = pimmslearn.pandas.flatten_dict_of_dicts(loaded)

if key not in all_metrics:
all_metrics[key] = loaded
Expand Down Expand Up @@ -320,7 +320,7 @@ def get_df_from_nested_dict(nested_dict,
row_name='subset'):
metrics = {}
for k, run_metrics in nested_dict.items():
metrics[k] = vaep.pandas.flatten_dict_of_dicts(run_metrics)
metrics[k] = pimmslearn.pandas.flatten_dict_of_dicts(run_metrics)

metrics = pd.DataFrame.from_dict(metrics, orient='index')
metrics.columns.names = column_levels
Expand Down
37 changes: 18 additions & 19 deletions vaep/models/ae.py → pimmslearn/models/ae.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Autoencoder model trained using denoising procedure.

Variational Autencoder model adapter should be moved to vaep.models.vae.
Variational Autencoder model adapter should be moved to pimmslearn.models.vae.
Or model class could be put somewhere else.
"""
import logging
Expand All @@ -15,22 +15,21 @@
from fastai.callback.core import Callback
from torch import nn

import vaep.io.dataloaders
import vaep.io.datasets
import vaep.io.datasplits
import vaep.models
import vaep.transform

from vaep.models import analysis
import pimmslearn.io.dataloaders
import pimmslearn.io.datasets
import pimmslearn.io.datasplits
import pimmslearn.models
import pimmslearn.transform
from pimmslearn.models import analysis

logger = logging.getLogger(__name__)


def get_preds_from_df(df: pd.DataFrame,
learn: fastai.learner.Learner,
transformer: vaep.transform.VaepPipeline,
transformer: pimmslearn.transform.VaepPipeline,
position_pred_tuple: int = None,
dataset: torch.utils.data.Dataset = vaep.io.datasets.DatasetWithTarget):
dataset: torch.utils.data.Dataset = pimmslearn.io.datasets.DatasetWithTarget):
"""Get predictions for specified DataFrame, using a fastai learner
and a custom sklearn Pipeline.

Expand All @@ -40,22 +39,22 @@ def get_preds_from_df(df: pd.DataFrame,
DataFrame to create predictions from.
learn : fastai.learner.Learner
fastai Learner with trained model
transformer : vaep.transform.VaepPipeline
transformer : pimmslearn.transform.VaepPipeline
Pipeline with separate encode and decode
position_pred_tuple : int, optional
In that the model returns multiple outputs, select the one which contains
the predictions matching the target variable (VAE case), by default None
dataset : torch.utils.data.Dataset, optional
Dataset to build batches from, by default vaep.io.datasets.DatasetWithTarget
Dataset to build batches from, by default pimmslearn.io.datasets.DatasetWithTarget

Returns
-------
tuple
tuple of pandas DataFrames (prediciton and target) based on learn.get_preds
"""
dl = vaep.io.dataloaders.get_test_dl(df=df,
transformer=transformer,
dataset=dataset)
dl = pimmslearn.io.dataloaders.get_test_dl(df=df,
transformer=transformer,
dataset=dataset)
res = learn.get_preds(dl=dl) # -> dl could be int
if position_pred_tuple is not None and issubclass(type(res[0]), tuple):
res = (res[0][position_pred_tuple], *res[1:])
Expand Down Expand Up @@ -272,11 +271,11 @@ def __init__(self,
decode: List[str],
bs=64
):
self.transform = vaep.transform.VaepPipeline(
self.transform = pimmslearn.transform.VaepPipeline(
df_train=train_df,
encode=transform,
decode=decode)
self.dls = vaep.io.dataloaders.get_dls(
self.dls = pimmslearn.io.dataloaders.get_dls(
train_X=train_df,
valid_X=val_df,
transformer=self.transform, bs=bs)
Expand All @@ -286,7 +285,7 @@ def __init__(self,
self.params = dict(self.kwargs_model)
self.model = model(**self.kwargs_model)

self.n_params_ae = vaep.models.calc_net_weight_count(self.model)
self.n_params_ae = pimmslearn.models.calc_net_weight_count(self.model)
self.params['n_parameters'] = self.n_params_ae
self.learn = None

Expand All @@ -296,4 +295,4 @@ def get_preds_from_df(self, df_wide: pd.DataFrame) -> pd.DataFrame:
return get_preds_from_df(df=df_wide, learn=self.learn, transformer=self.transform)

def get_test_dl(self, df_wide: pd.DataFrame, bs: int = 64) -> pd.DataFrame:
return vaep.io.dataloaders.get_test_dl(df=df_wide, transformer=self.transform, bs=bs)
return pimmslearn.io.dataloaders.get_test_dl(df=df_wide, transformer=self.transform, bs=bs)
6 changes: 3 additions & 3 deletions vaep/models/analysis.py → pimmslearn/models/analysis.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import vaep.transform
import pimmslearn.transform
import torch.nn
import fastai.data.core
import fastai.learner

from vaep.analyzers import Analysis
from pimmslearn.analyzers import Analysis


class ModelAnalysis(Analysis):
Expand All @@ -13,4 +13,4 @@ class ModelAnalysis(Analysis):
dls: fastai.data.core.DataLoaders
learn: fastai.learner.Learner
params: dict
transform: vaep.transform.VaepPipeline
transform: pimmslearn.transform.VaepPipeline
Loading
Loading