From 77310272e3b755d1a263c0162fbb421f78305fb7 Mon Sep 17 00:00:00 2001 From: hy395 Date: Mon, 11 Dec 2023 15:29:17 -0800 Subject: [PATCH] add ignore --- .gitignore | 2 + build/lib/scnym/__init__.py | 8 - build/lib/scnym/__main__.py | 3 - build/lib/scnym/api.py | 1515 -------------- build/lib/scnym/attributionpriors.py | 605 ------ build/lib/scnym/dataprep.py | 765 ------- build/lib/scnym/distributions.py | 420 ---- build/lib/scnym/interpret.py | 1368 ------------ build/lib/scnym/losses.py | 1838 ----------------- build/lib/scnym/main.py | 1678 --------------- build/lib/scnym/model.py | 603 ------ build/lib/scnym/predict.py | 216 -- build/lib/scnym/scnym_ad.py | 217 -- build/lib/scnym/trainer.py | 1412 ------------- build/lib/scnym/utils.py | 743 ------- scnym.egg-info/PKG-INFO | 46 - scnym.egg-info/SOURCES.txt | 60 - scnym.egg-info/dependency_links.txt | 1 - scnym.egg-info/entry_points.txt | 3 - scnym.egg-info/requires.txt | 31 - scnym.egg-info/top_level.txt | 1 - scnym/__pycache__/__init__.cpython-38.pyc | Bin 452 -> 0 bytes scnym/__pycache__/api.cpython-38.pyc | Bin 34712 -> 0 bytes .../attributionpriors.cpython-38.pyc | Bin 18470 -> 0 bytes scnym/__pycache__/dataprep.cpython-38.pyc | Bin 17744 -> 0 bytes .../__pycache__/distributions.cpython-38.pyc | Bin 12479 -> 0 bytes scnym/__pycache__/interpret.cpython-38.pyc | Bin 35948 -> 0 bytes scnym/__pycache__/losses.cpython-38.pyc | Bin 42806 -> 0 bytes scnym/__pycache__/main.cpython-38.pyc | Bin 32946 -> 0 bytes scnym/__pycache__/model.cpython-38.pyc | Bin 14339 -> 0 bytes scnym/__pycache__/predict.cpython-38.pyc | Bin 4950 -> 0 bytes scnym/__pycache__/trainer.cpython-38.pyc | Bin 27471 -> 0 bytes scnym/__pycache__/utils.cpython-38.pyc | Bin 18831 -> 0 bytes 33 files changed, 2 insertions(+), 11533 deletions(-) delete mode 100644 build/lib/scnym/__init__.py delete mode 100644 build/lib/scnym/__main__.py delete mode 100644 build/lib/scnym/api.py delete mode 100644 build/lib/scnym/attributionpriors.py delete mode 100644 build/lib/scnym/dataprep.py delete mode 100644 build/lib/scnym/distributions.py delete mode 100644 build/lib/scnym/interpret.py delete mode 100644 build/lib/scnym/losses.py delete mode 100644 build/lib/scnym/main.py delete mode 100644 build/lib/scnym/model.py delete mode 100644 build/lib/scnym/predict.py delete mode 100644 build/lib/scnym/scnym_ad.py delete mode 100644 build/lib/scnym/trainer.py delete mode 100644 build/lib/scnym/utils.py delete mode 100644 scnym.egg-info/PKG-INFO delete mode 100644 scnym.egg-info/SOURCES.txt delete mode 100644 scnym.egg-info/dependency_links.txt delete mode 100644 scnym.egg-info/entry_points.txt delete mode 100644 scnym.egg-info/requires.txt delete mode 100644 scnym.egg-info/top_level.txt delete mode 100644 scnym/__pycache__/__init__.cpython-38.pyc delete mode 100644 scnym/__pycache__/api.cpython-38.pyc delete mode 100644 scnym/__pycache__/attributionpriors.cpython-38.pyc delete mode 100644 scnym/__pycache__/dataprep.cpython-38.pyc delete mode 100644 scnym/__pycache__/distributions.cpython-38.pyc delete mode 100644 scnym/__pycache__/interpret.cpython-38.pyc delete mode 100644 scnym/__pycache__/losses.cpython-38.pyc delete mode 100644 scnym/__pycache__/main.cpython-38.pyc delete mode 100644 scnym/__pycache__/model.cpython-38.pyc delete mode 100644 scnym/__pycache__/predict.cpython-38.pyc delete mode 100644 scnym/__pycache__/trainer.cpython-38.pyc delete mode 100644 scnym/__pycache__/utils.cpython-38.pyc diff --git a/.gitignore b/.gitignore index f16cf9f..b0f5e9b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ dist/ **/__pycache__/ .ipynb_checkpoints/ +build/ +*.egg-info/ diff --git a/build/lib/scnym/__init__.py b/build/lib/scnym/__init__.py deleted file mode 100644 index 14f386b..0000000 --- a/build/lib/scnym/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -__author__ = "Jacob C. Kimmel, David R. Kelley" -__email__ = "jacobkimmel+scnym@gmail.com, drk@calicolabs.com" -__version__ = "0.3.4" - -# populate the namespace so top level imports work -# e.g. -# >> from scnym.model import CellTypeCLF -from . import api, main, dataprep, interpret, model, predict, trainer, utils diff --git a/build/lib/scnym/__main__.py b/build/lib/scnym/__main__.py deleted file mode 100644 index 0eccd5e..0000000 --- a/build/lib/scnym/__main__.py +++ /dev/null @@ -1,3 +0,0 @@ -from scnym.main import main - -main() diff --git a/build/lib/scnym/api.py b/build/lib/scnym/api.py deleted file mode 100644 index d39dc54..0000000 --- a/build/lib/scnym/api.py +++ /dev/null @@ -1,1515 +0,0 @@ -""" -Classify cell identities using scNym - -scnym_api() is the main API endpoint for users. -This function allows for training and prediction using scnym_train() -and scnym_predict(). Both of these functions will be infrequently -accessed by users. - -get_pretrained_weights() is a wrapper function that downloads pretrained -weights from our cloud storage bucket. -atlas2target() downloads preprocessed reference datasets and concatenates -them onto a user supplied target dataset. -""" -from typing import Optional, Union, List, Tuple -from anndata import AnnData -import scanpy as sc -import numpy as np -import pandas as pd -import torch -import os -import os.path as osp -import copy -import pickle -import warnings -import itertools -import pprint -import logging -import tqdm - -# for fetching pretrained weights, all in standard lib -import requests -import json -import urllib - -# for data splits -from sklearn.model_selection import StratifiedKFold - -# from scnym -from . import utils -from . import model -from . import main -from . import predict -from . import dataprep -from . import interpret - -# Define constants - -logger = logging.getLogger(__name__) - -TEST_URL = ( - "https://storage.googleapis.com/calico-website-mca-storage/kang_2017_stim_pbmc.h5ad" -) -WEIGHTS_JSON = "https://storage.googleapis.com/calico-website-scnym-storage/link_tables/pretrained_weights.json" -REFERENCE_JSON = "https://storage.googleapis.com/calico-website-scnym-storage/link_tables/cell_atlas.json" - -ATLAS_ANNOT_KEYS = { - "human": "celltype", - "mouse": "cell_ontology_class", - "rat": "cell_ontology_class", -} - -TASKS = ( - "train", - "predict", -) - -# Define configurations - -CONFIGS = { - "default": { - "n_epochs": 100, - "patience": 40, - "lr": 1.0, - "optimizer_name": "adadelta", - "weight_decay": 1e-4, - "batch_size": 256, - "balanced_classes": False, - "weighted_classes": False, - "mixup_alpha": 0.3, - "unsup_max_weight": 1.0, - "unsup_mean_teacher": False, - "ssl_method": "mixmatch", - "ssl_kwargs": { - "augment_pseudolabels": False, - "augment": "log1p_drop", - "unsup_criterion": "mse", - "n_augmentations": 1, - "T": 0.5, - "ramp_epochs": 100, - "burn_in_epochs": 0, - "dan_criterion": True, - "dan_ramp_epochs": 20, - "dan_max_weight": 0.1, - "min_epochs": 20, - }, - "model_kwargs": { - "n_hidden": 256, - "n_layers": 2, - "init_dropout": 0.0, - "residual": False, - }, - "tensorboard": False, - }, -} - -CONFIGS["no_new_identity"] = copy.deepcopy(CONFIGS["default"]) -CONFIGS["no_new_identity"][ - "description" -] = "Train scNym models with MixMatch and a domain adversary, assuming no new cell types in the target data." - -CONFIGS["new_identity_discovery"] = copy.deepcopy(CONFIGS["default"]) -CONFIGS["new_identity_discovery"]["ssl_kwargs"]["pseudolabel_min_confidence"] = 0.9 -CONFIGS["new_identity_discovery"]["ssl_kwargs"]["dan_use_conf_pseudolabels"] = True -CONFIGS["new_identity_discovery"][ - "description" -] = "Train scNym models with MixMatch and a domain adversary, using pseudolabel thresholding to allow for new cell type discoveries." - -CONFIGS["no_dan"] = copy.deepcopy(CONFIGS["default"]) -CONFIGS["no_dan"]["ssl_kwargs"]["dan_max_weight"] = 0.0 -CONFIGS["no_dan"]["ssl_kwargs"]["dan_ramp_epochs"] = 1 -CONFIGS["no_dan"][ - "description" -] = "Train scNym models with MixMatch but no domain adversary. May be useful if class imbalance is very large." - -CONFIGS["no_ssl"] = copy.deepcopy(CONFIGS["default"]) -CONFIGS["no_ssl"]["ssl_kwargs"]["dan_max_weight"] = 0.0 -CONFIGS["no_ssl"]["ssl_kwargs"]["dan_ramp_epochs"] = 1 -CONFIGS["no_ssl"]["ssl_kwargs"]["unsup_max_weight"] = 0.0 -CONFIGS["no_ssl"][ - "description" -] = "Train scNym models with MixMatch but no domain adversary. May be useful if class imbalance is very large." - - -UNLABELED_TOKEN = "Unlabeled" - - -def scnym_api( - adata: AnnData, - task: str = "train", - groupby: str = None, - domain_groupby: str = None, - out_path: str = "./scnym_outputs", - trained_model: str = None, - config: Union[dict, str] = "new_identity_discovery", - key_added: str = "scNym", - copy: bool = False, - **kwargs, -) -> Optional[AnnData]: - """ - scNym: Semi-supervised adversarial neural networks for - single cell classification [Kimmel2020]_. - - scNym is a cell identity classifier that transfers annotations from one - single cell experiment to another. The model is implemented as a neural - network that employs MixMatch semi-supervision and a domain adversary to - take advantage of unlabeled data during training. scNym offers superior - performance to many baseline single cell identity classification methods. - - Parameters - ---------- - adata - Annotated data matrix used for training or prediction. - If `"scNym_split"` in `.obs_keys()`, uses the cells annotated - `"train", "val"` to select data splits. - task - Task to perform, either "train" or "predict". - If "train", uses `adata` as labeled training data. - If "predict", uses `trained_model` to infer cell identities for - observations in `adata`. - groupby - Column in `adata.obs` that contains cell identity annotations. - Values of `"Unlabeled"` indicate that a given cell should be used - only as unlabeled data during training. - domain_groupby - Column in `adata.obs` that contains domain labels as integers. - Each domain of origin (e.g. batch, species) should be given a unique - domain label. - If `domain_groupby is None`, train and target data are each considered - a unique domain. - out_path - Path to a directory for saving scNym model weights and training logs. - trained_model - Path to the output directory of an scNym training run - or a string specifying a pretrained model. - If provided while `task == "train"`, used as an initialization. - config - Configuration name or dictionary of configuration of parameters. - Pre-defined configurations: - "new_identity_discovery" - Default. Employs pseudolabel thresholding to - allow for discovery of new cell identities in the target dataset using - scNym confidence scores. - "no_new_identity" - Assumes all cells in the target data belong to one - of the classes in the training data. Recommended to improve performance - when this assumption is valid. - key_added - Key added to `adata.obs` with scNym predictions if `task=="predict"`. - copy - copy the AnnData object before predicting cell types. - - Returns - ------- - Depending on `copy`, returns or updates `adata` with the following fields. - - `X_scnym` : :class:`~numpy.ndarray`, (:attr:`~anndata.AnnData.obsm`, shape=(n_samples, n_hidden), dtype `float`) - scNym embedding coordinates of data. - `scNym` : (`adata.obs`, dtype `str`) - scNym cell identity predictions for each observation. - `scNym_train_results` : :class:`~dict`, (:attr:`~anndata.AnnData.uns`) - results of scNym model training. - - Examples - -------- - >>> import scanpy as sc - >>> from scnym.api import scnym_api, atlas2target - - **Loading Data and preparing labels** - - >>> adata = sc.datasets.kang17() - >>> target_bidx = adata.obs['stim']=='stim' - >>> adata.obs['cell'] = np.array(adata.obs['cell']) - >>> adata.obs.loc[target_bidx, 'cell'] = 'Unlabeled' - - **Train an scNym model** - - >>> scnym_api( - ... adata=adata, - ... task='train', - ... groupby='clusters', - ... out_path='./scnym_outputs', - ... config='no_new_identity', - ... ) - - **Predict cell identities with the trained scNym model** - - >>> path_to_model = './scnym_outputs/' - >>> scnym_api( - ... adata=adata, - ... task='predict', - ... groupby='scNym', - ... trained_model=path_to_model, - ... config='no_new_identity', - ... ) - - **Perform semi-supervised training with an atlas** - - >>> joint_adata = atlas2target( - ... adata=adata, - ... species='mouse', - ... key_added='annotations', - ... ) - >>> scnym_api( - ... adata=joint_adata, - ... task='train', - ... groupby='annotations', - ... out_path='./scnym_outputs', - ... config='no_new_identity', - ... ) - """ - if task not in TASKS: - msg = f"{task} is not a valid scNym task.\n" - msg += f"must be one of {TASKS}" - raise ValueError(msg) - - # check configuration arguments and choose a config - if type(config) == str: - if config not in CONFIGS.keys(): - msg = f"{config} is not a predefined configuration.\n" - msg += f"must be one of {CONFIGS.keys()}." - raise ValueError(msg) - else: - config = CONFIGS[config] - elif type(config) != dict: - msg = f"`config` was a {type(config)}, must be dict or str." - raise TypeError(msg) - else: - # config is a dictionary of parameters - # add or update default parameters based on these - dconf = CONFIGS["default"] - for k in config.keys(): - dconf[k] = config[k] - config = dconf - logger.debug(f"Finalized config: {config}") - - # check for CUDA - if torch.cuda.is_available(): - print("CUDA compute device found.") - else: - print("No CUDA device found.") - print("Computations will be performed on the CPU.") - print("Add a CUDA compute device to improve speed dramatically.\n") - - if not osp.exists(out_path): - os.makedirs(out_path, exist_ok=True) - - # add args to `config` - config["out_path"] = out_path - config["groupby"] = groupby - config["key_added"] = key_added - config["trained_model"] = trained_model - config["domain_groupby"] = domain_groupby - - ################################################ - # check that there are no duplicate genes in the input object - ################################################ - n_genes = adata.shape[1] - n_unique_genes = len(np.unique(adata.var_names)) - if n_genes != n_unique_genes: - msg = "Duplicate Genes Error\n" - msg += "Not all genes passed to scNym were unique.\n" - msg += f"{n_genes} genes are present but only {n_unique_genes} unique genes were detected.\n" - msg += "Please use unique gene names in your input object.\n" - msg += "This can be achieved by running `adata.var_names_make_unique()`" - raise ValueError(msg) - - ################################################ - # check that `adata.X` are log1p(CPM) counts - ################################################ - # we can't directly check if cells were normalized to CPM because - # users may have filtered out genes *a priori*, so the cell sum - # may no longer be ~= 1e6. - # however, we can check that our assumptions about log normalization - # are true. - - # check that the min/max are within log1p(CPM) range - x_max = np.max(adata.X) > np.log1p(1e6) - x_min = np.min(adata.X) < 0.0 - - # check to see if a user accidently provided raw counts - if type(adata.X) == np.ndarray: - int_counts = np.all(np.equal(np.mod(adata.X, 1), 0)) - else: - int_counts = np.all(np.equal(np.mod(adata.X.data, 1), 0)) - - if x_max or x_min or int_counts: - msg = "Normalization error\n" - msg += ( - "`adata.X` does not appear to be log(CountsPerMillion+1) normalized data.\n" - ) - msg += "Please replace `adata.X` with log1p(CPM) values.\n" - msg += ">>> # starting from raw counts in `adata.X`\n" - msg += ">>> sc.pp.normalize_total(adata, target_sum=1e6))\n" - msg += ">>> sc.pp.log1p(adata)" - raise ValueError(msg) - - ################################################ - # check inputs and launch the appropriate task - ################################################ - - if task == "train": - # pass parameters to training routine - if groupby not in adata.obs.columns: - msg = f"{groupby} is not a variable in `adata.obs`" - raise ValueError(msg) - - scnym_train( - adata=adata, - config=config, - ) - elif task == "predict": - # check that a pre-trained model was specified or - # provided for prediction - if trained_model is None: - msg = "must provide a path to a trained model for prediction." - raise ValueError(msg) - if not os.path.exists(trained_model) and "pretrained_" not in trained_model: - msg = "path to the trained model does not exist." - raise FileNotFoundError(msg) - # predict identities - config["model_weights"] = trained_model - scnym_predict( - adata=adata, - config=config, - ) - - elif task == "interpret": - - scnym_interpret( - adata=adata, - config=config, - **kwargs, - ) - - else: - msg = f"{task} is not a valid task." - raise ValueError(msg) - - return - - -def scnym_train( - adata: AnnData, - config: dict, -) -> None: - """Train an scNym model. - - Parameters - ---------- - adata : AnnData - [Cells, Genes] experiment containing annotated - cells to train on. - config : dict - configuration options. - - Returns - ------- - None. - Saves model outputs to `config["out_path"]` and adds model results - to `adata.uns["scnym_train_results"]`. - - Notes - ----- - This method should only be directly called by advanced users. - Most users should use `scnym_api`. - - See Also - -------- - scnym_api - """ - # determine if unlabeled examples are present - n_unlabeled = np.sum(adata.obs[config["groupby"]] == UNLABELED_TOKEN) - if n_unlabeled == 0: - print("No unlabeled data was found.") - print(f'Did you forget to set some examples as `"{UNLABELED_TOKEN}"`?') - print("Proceeding with purely supervised training.") - print() - - unlabeled_counts = None - unlabeled_genes = None - - X = utils.get_adata_asarray(adata) - y = pd.Categorical( - np.array(adata.obs[config["groupby"]]), - categories=np.unique(adata.obs[config["groupby"]]), - ).codes - class_names = np.unique(adata.obs[config["groupby"]]) - # set all samples for training - train_adata = adata - # set no samples as `target_bidx` - target_bidx = np.zeros(adata.shape[0], dtype=np.bool) - else: - print(f"{n_unlabeled} unlabeled observations found.") - print( - "Using unlabeled data as a target set for semi-supervised, adversarial training." - ) - print() - - target_bidx = adata.obs[config["groupby"]] == UNLABELED_TOKEN - - train_adata = adata[~target_bidx, :] - target_adata = adata[target_bidx, :] - - print("training examples: ", train_adata.shape) - print("target examples: ", target_adata.shape) - - X = utils.get_adata_asarray(train_adata) - y = pd.Categorical( - np.array(train_adata.obs[config["groupby"]]), - categories=np.unique(train_adata.obs[config["groupby"]]), - ).codes - unlabeled_counts = utils.get_adata_asarray(target_adata) - class_names = np.unique(train_adata.obs[config["groupby"]]) - - print("X: ", X.shape) - print("y: ", y.shape) - - if "scNym_split" not in adata.obs_keys(): - # perform a 90/10 train test split - traintest_idx = np.random.choice( - X.shape[0], size=int(np.floor(0.9 * X.shape[0])), replace=False - ) - val_idx = np.setdiff1d(np.arange(X.shape[0]), traintest_idx) - else: - train_idx = np.where(train_adata.obs["scNym_split"] == "train")[0] - test_idx = np.where( - train_adata.obs["scNym_split"] == "test", - )[0] - val_idx = np.where(train_adata.obs["scNym_split"] == "val")[0] - - if len(train_idx) < 100 or len(test_idx) < 10 or len(val_idx) < 10: - msg = "Few samples in user provided data split.\n" - msg += f"{len(train_idx)} training samples.\n" - msg += f"{len(test_idx)} testing samples.\n" - msg += f"{len(val_idx)} validation samples.\n" - msg += "Halting." - raise RuntimeError(msg) - # `fit_model()` takes a tuple of `traintest_idx` - # as a training index and testing index pair. - traintest_idx = ( - train_idx, - test_idx, - ) - - # check if domain labels were manually specified - if config.get("domain_groupby", None) is not None: - domain_groupby = config["domain_groupby"] - # check that the column actually exists - if domain_groupby not in adata.obs.columns: - msg = f"no column `{domain_groupby}` exists in `adata.obs`.\n" - msg += "if domain labels are specified, a matching column must exist." - raise ValueError(msg) - # get the label indices as unique integers using pd.Categorical - # to code each unique label with an int - domains = np.array( - pd.Categorical( - adata.obs[domain_groupby], - categories=np.unique(adata.obs[domain_groupby]), - ).codes, - dtype=np.int32, - ) - # split domain labels into source and target sets for `fit_model` - input_domain = domains[~target_bidx] - unlabeled_domain = domains[target_bidx] - print("Using user provided domain labels.") - n_source_doms = len(np.unique(input_domain)) - n_target_doms = len(np.unique(unlabeled_domain)) - print( - f"Found {n_source_doms} source domains and {n_target_doms} target domains." - ) - else: - # no domains manually supplied, providing `None` to `fit_model` - # will treat source data as one domain and target data as another - input_domain = None - unlabeled_domain = None - - # check if pre-trained weights should be used to initialize the model - if config["trained_model"] is None: - pretrained = None - elif "pretrained_" in config["trained_model"]: - msg = "pretrained model fetching is not supported for training." - raise NotImplementedError(msg) - else: - # setup a prediction model - pretrained = osp.join( - config["trained_model"], - "00_best_model_weights.pkl", - ) - if not osp.exists(pretrained): - msg = f"{pretrained} file not found." - raise FileNotFoundError(msg) - - acc, loss = main.fit_model( - X=X, - y=y, - traintest_idx=traintest_idx, - val_idx=val_idx, - batch_size=config["batch_size"], - n_epochs=config["n_epochs"], - lr=config["lr"], - optimizer_name=config["optimizer_name"], - weight_decay=config["weight_decay"], - ModelClass=model.CellTypeCLF, - balanced_classes=config["balanced_classes"], - weighted_classes=config["weighted_classes"], - out_path=config["out_path"], - mixup_alpha=config["mixup_alpha"], - unlabeled_counts=unlabeled_counts, - input_domain=input_domain, - unlabeled_domain=unlabeled_domain, - unsup_max_weight=config["unsup_max_weight"], - unsup_mean_teacher=config["unsup_mean_teacher"], - ssl_method=config["ssl_method"], - ssl_kwargs=config["ssl_kwargs"], - pretrained=pretrained, - patience=config.get("patience", None), - save_freq=config.get("save_freq", None), - tensorboard=config.get("tensorboard", False), - **config["model_kwargs"], - ) - - # add the final model results to `adata` - results = { - "model_path": osp.realpath( - osp.join(config["out_path"], "00_best_model_weights.pkl") - ), - "final_acc": acc, - "final_loss": loss, - "n_genes": adata.shape[1], - "n_cell_types": len(np.unique(y)), - "class_names": class_names, - "gene_names": adata.var_names.tolist(), - "model_kwargs": config["model_kwargs"], - "traintest_idx": traintest_idx, - "val_idx": val_idx, - } - assert osp.exists(results["model_path"]) - - adata.uns["scNym_train_results"] = results - - # save the final model results to disk - train_results_path = osp.join( - config["out_path"], - "scnym_train_results.pkl", - ) - - with open(train_results_path, "wb") as f: - pickle.dump(results, f) - return - - -@torch.no_grad() -def scnym_predict( - adata: AnnData, - config: dict, -) -> None: - """Predict cell identities using an scNym model. - - Parameters - ---------- - adata : AnnData - [Cells, Genes] experiment containing annotated - cells to train on. - config : dict - configuration options. - - Returns - ------- - None. Adds `adata.obs[config["key_added"]]` and `adata.obsm["X_scnym"]`. - - Notes - ----- - This method should only be directly called by advanced users. - Most users should use `scnym_api`. - - See Also - -------- - scnym_api - """ - # check if a pretrained model was requested - if "pretrained_" in config["trained_model"]: - msg = "Pretrained Request Error\n" - msg += "Pretrained weights are no longer supported in scNym.\n" - raise NotImplementedError(msg) - # species = _get_pretrained_weights( - # trained_model=config['trained_model'], - # out_path=config['out_path'], - # ) - # print(f'Successfully downloaded pretrained model for {species}.') - # config['trained_model'] = config['out_path'] - - # load training parameters - with open( - osp.join(config["trained_model"], "scnym_train_results.pkl"), - "rb", - ) as f: - results = pickle.load(f) - - # setup a prediction model - model_weights_path = osp.join( - config["trained_model"], - "00_best_model_weights.pkl", - ) - - P = predict.Predicter( - model_weights=model_weights_path, - n_genes=results["n_genes"], - n_cell_types=results["n_cell_types"], - labels=results["class_names"], - **config["model_kwargs"], - ) - n_cell_types = results["n_cell_types"] - n_genes = results["n_genes"] - print(f"Loaded model predicting {n_cell_types} classes from {n_genes} features") - print(results["class_names"]) - - # Generate a classification matrix - print("Building a classification matrix...") - X_raw = utils.get_adata_asarray(adata) - X = utils.build_classification_matrix( - X=X_raw, - model_genes=np.array(results["gene_names"]), - sample_genes=np.array(adata.var_names), - ) - - # Predict cell identities - print("Predicting cell types...") - pred, names, prob = P.predict( - X, - output="prob", - ) - - prob = pd.DataFrame( - prob, - columns=results["class_names"], - index=adata.obs_names, - ) - - # Extract model embeddings - print("Extracting model embeddings...") - ds = dataprep.SingleCellDS(X=X, y=np.zeros(X.shape[0])) - dl = torch.utils.data.DataLoader( - ds, - batch_size=config["batch_size"], - shuffle=False, - ) - - model = P.models[0] - lz_02 = torch.nn.Sequential(*list(list(model.modules())[0].children())[1][:-1]) - - embeddings = [] - for data in dl: - input_ = data["input"] - input_ = input_.to(device=next(model.parameters()).device) - z = lz_02(input_) - embeddings.append(z.detach().cpu()) - Z = torch.cat(embeddings, 0) - - # Store results in the anndata object - adata.obs[config["key_added"]] = names - adata.obs[config["key_added"] + "_confidence"] = np.max(prob, axis=1) - adata.uns["scNym_probabilities"] = prob - adata.obsm["X_scnym"] = Z.numpy() - - return - - -def _get_pretrained_weights( - trained_model: str, - out_path: str, -) -> str: - """Given the name of a set of pretrained model weights, - fetch weights from GCS and return the model state dict. - - Parameters - ---------- - trained_model : str - the name of a pretrained model to use, formatted as - "pretrained_{species}". - species should be one of {"human", "mouse", "rat"}. - out_path : str - path for saving model weights and outputs. - - Returns - ------- - species : str - species parsed from the trained model name. - Saves "{out_path}/00_best_model_weights.pkl" and - "{out_path}/scnym_train_results.pkl". - - Notes - ----- - Requires an internet connection to download pre-trained weights. - """ - # check that the trained_model argument is valid - if "pretrained_" not in trained_model: - msg = 'pretrained model names must contain `"pretrained_"`' - raise ValueError(msg) - - species = trained_model.split("pretrained_")[1] - - # download a table of available pretrained models - try: - pretrained_weights_dict = json.loads(requests.get(WEIGHTS_JSON).text) - except requests.exceptions.ConnectionError: - print("Could not download pretrained weighs listing from:") - print(f"\t{WEIGHTS_JSON}") - print("Loading pretrained model failed.") - - # check that the species specified has pretrained weights - if species not in pretrained_weights_dict.keys(): - msg = f"pretrained weights not available for {species}." - raise ValueError(species) - - # get pretrained weights - path_for_weights = osp.join(out_path, f"00_best_model_weights.pkl") - urllib.request.urlretrieve( - pretrained_weights_dict[species], - path_for_weights, - ) - - # load model parameters - model_params = {} - urllib.request.urlretrieve( - pretrained_weights_dict["model_params"][species]["gene_names"], - osp.join(out_path, "pretrained_gene_names.csv"), - ) - urllib.request.urlretrieve( - pretrained_weights_dict["model_params"][species]["class_names"], - osp.join(out_path, "pretrained_class_names.csv"), - ) - model_params["gene_names"] = np.loadtxt( - osp.join(out_path, "pretrained_gene_names.csv"), - delimiter=",", - dtype="str", - ) - model_params["class_names"] = np.loadtxt( - osp.join(out_path, "pretrained_class_names.csv"), - delimiter=",", - dtype="str", - ) - model_params["n_genes"] = len(model_params["gene_names"]) - model_params["n_cell_types"] = len(model_params["class_names"]) - - # save model parameters to a results file in the output dir - path_for_results = f"{out_path}/scnym_train_results.pkl" - with open(path_for_results, "wb") as f: - pickle.dump(model_params, f) - - # check that files are present - if not osp.exists(path_for_weights): - raise FileNotFoundError(path_for_weights) - if not osp.exists(path_for_results): - raise FileNotFoundError(path_for_results) - - return species - - -def atlas2target( - adata: AnnData, - species: str, - key_added: str = "annotations", -) -> AnnData: - """Download a preprocessed cell atlas dataset and - append your new dataset as a target to allow for - semi-supervised scNym training. - - Parameters - ---------- - adata : anndata.AnnData - [Cells, Features] experiment to use as a target - dataset. - `adata.var_names` must be formatted as Ensembl gene - names for the relevant species to match the atlas. - e.g. `"Gapdh`" for mouse or `"GAPDH"` for human, rather - than Ensembl gene IDs or another gene annotation. - - Returns - ------- - joint_adata : anndata.AnnData - [Cells, Features] experiment concatenated with a - preprocessed cell atlas reference dataset. - Annotations from the atlas are copied to `.obs[key_added]` - and all cells in the target dataset `adata` are labeled - with the special "Unlabeled" token. - - Examples - -------- - >>> adata = sc.datasets.pbmc3k() - >>> joint_adata = scnym.api.atlas2target( - ... adata=adata, - ... species='human', - ... key_added='annotations', - ... ) - - Notes - ----- - Requires an internet connection to download reference datasets. - """ - # download a directory of cell atlases - try: - reference_dict = json.loads(requests.get(REFERENCE_JSON).text) - except requests.exceptions.ConnectionError: - print("Could not download pretrained weighs listing from:") - print(f"\t{REFERENCE_JSON}") - print("Loading pretrained model failed.") - - # check that the species presented is available - if species not in reference_dict.keys(): - msg = f"pretrained weights not available for {species}." - raise ValueError(species) - - # check that there are no gene duplications - n_uniq_genes = len(np.unique(adata.var_names)) - if n_uniq_genes < len(adata.var_names): - msg = f"{n_uniq_genes} unique features found, but {adata.shape[1]} features are listed.\n" - msg += "Please de-duplicate features in `adata` before joining with an atlas dataset.\n" - msg += "Consider `adata.var_names_make_unique()` or aggregating values for features with the same identifier." - raise ValueError(msg) - - # download the atlas of interest - atlas = sc.datasets._datasets.read( - sc.settings.datasetdir / f"atlas_{species}.h5ad", - backup_url=reference_dict[species], - ) - del atlas.raw - - # get the key used by the cell atlas - atlas_annot_key = ATLAS_ANNOT_KEYS[species] - - # copy atlas annotations to the specified column - atlas.obs[key_added] = np.array(atlas.obs[atlas_annot_key]) - atlas.obs["scNym_dataset"] = "atlas_reference" - - # label target data with "Unlabeled" - adata.obs[key_added] = "Unlabeled" - adata.obs["scNym_dataset"] = "target" - - # check that at least some genes overlap between the atlas - # and the target data - FEW_GENES = 100 - n_overlapping_genes = len(np.intersect1d(adata.var_names, atlas.var_names)) - if n_overlapping_genes == 0: - msg = "No genes overlap between the target data `adata` and the atlas.\n" - msg += 'Genes in the atlas are named using Ensembl gene symbols (e.g. `"Gapdh"`).\n' - msg += "Ensure `adata.var_names` also uses gene symbols." - raise RuntimeError(msg) - elif n_overlapping_genes < FEW_GENES: - msg = f"Only {n_overlapping_genes} overlapping genes were found between the target and atlas.\n" - msg += "Ensure your target dataset `adata.var_names` are Ensembl gene names.\n" - msg += "Continuing with transer, but performance is likely to be poor." - warnings.warn(msg) - else: - msg = f"{n_overlapping_genes} overlapping genes found between the target and atlas data." - logger.info(msg) - - # join the target and atlas data - joint_adata = atlas.concatenate( - adata, - join="inner", - ) - - return joint_adata - - -def list_configs(): - for k in CONFIGS.keys(): - print(f"name: {k}") - print("\t" + CONFIGS[k]["description"]) - return - - -def _get_keys_and_list(d: dict) -> Tuple[List[list], List[list]]: - """Get a set of keys mapping to a list in a - nested dictionary structure and the list value. - - Parameters - ---------- - d : dict - a nested dictionary structure where all terminal - values are lists. - - Returns - ------- - keys : List[list] - sequential keys required to access a set of - associated terminal values. - mapped by index to `values`. - values : List[list] - lists of terminal values, each accessed by the - set of `keys` with a matching index from `d`. - """ - accession_keys = [] - associated_values = [] - for k in d.keys(): - if type(d[k]) == dict: - # the value is nested, recurse - keys, values = _get_keys_and_list(d[k]) - keys = [ - [ - k, - ] - + x - for x in keys - ] - else: - keys = [ - [k], - ] - values = [d[k]] - - for i in range(len(values)): - accession_keys.append(keys[i]) - associated_values.append(values[i]) - - return accession_keys, associated_values - - -def _updated_nested(d: dict, keys: list, value: list) -> dict: - """Updated the values in a dictionary with multiple nested levels. - - Parameters - ---------- - d : dict - multilevel dictionary. - keys : list - sequential keys specifying a value to update - value : list - new value to use in the update. - - Returns - ------- - d : dict - updated dictionary. - """ - if type(d.get(keys[0], None)) == dict: - # multilevel, recurse - _updated_nested(d[keys[0]], keys[1:], value) - else: - d[keys[0]] = value - return - - -def split_data( - adata: AnnData, - groupby: str, - n_splits: int, -) -> None: - """Split data using a stratified k-fold. - - Parameters - ---------- - adata : anndata.AnnData - [Cells, Genes] experiment. - groupby : str - annotation column in `.obs`. - used for stratification. - n_splits : int - number of train/test/val splits to perform for tuning. - performs at least 5-fold splitting and uses a subset of - the folds if `n_splits < 5`. - - Returns - ------- - None. Adds `f"scNym_split_{n}"` to `adata.obs` for all `n` - in `[0, n_splits)`. - """ - # generate cross val splits - cv = StratifiedKFold( - n_splits=max(5, n_splits), - shuffle=True, - ) - split_indices = list(cv.split(adata.X, adata.obs[groupby])) - - for split_number, train_test in enumerate(split_indices): - - train_idx = train_test[0] - testval_idx = train_test[1] - - test_idx = np.random.choice( - testval_idx, - size=int(np.ceil(len(testval_idx) / 2)), - replace=False, - ) - val_idx = np.setdiff1d( - testval_idx, - test_idx, - ) - - # these tokens are recognized by `api.scnym_train` - adata.obs[f"scNym_split_{split_number}"] = "ERROR" - adata.obs.loc[ - adata.obs_names[train_idx], f"scNym_split_{split_number}" - ] = "train" - adata.obs.loc[adata.obs_names[test_idx], f"scNym_split_{split_number}"] = "test" - adata.obs.loc[adata.obs_names[val_idx], f"scNym_split_{split_number}"] = "val" - - return - - -def _circular_train( - search_config: dict, - params: tuple, - adata: AnnData, - groupby: str, - out_path: str, - accession_keys: List[list], - hold_out_only: bool, - groupby_eval: str, -) -> pd.DataFrame: - """ - Perform a circular training loop for a parameter set. - - Parameters - ---------- - search_config : tuple - configuration for parameter search. - params : tuple - search parameter values - adata : anndata.AnnData - [Cells, Genes] experiment for optimization. - groupby : str - annotation column in `.obs`. - accession_keys : List[list] - sequential keys required to access a set of - associated terminal values. - mapped by index to `values`. - hold_out_only : bool - evaluate the circular accuracy only on a held-out set of - training data, not used in the training of the first - source -> target model. - - Returns - ------- - search_df : pd.DataFrame - [1, (params,) + (acc,)] - search_config : dict - adjusted configuration file for this parameter search. - """ - search_number = search_config["search_number"] - split_number = search_config["split_number"] - # fit the source2target - s2t_out_path = osp.join( - out_path, f"search_{search_number:04}_split_{split_number:04}_source2target" - ) - adata = adata.copy() - - logger.info("\n>>>\nTraining source2target model\n>>>\n") - scnym_api( - adata=adata, - groupby=groupby, - task="train", - out_path=s2t_out_path, - config=search_config, - ) - - # load the hold out test acc - with open(osp.join(s2t_out_path, "scnym_train_results.pkl"), "rb") as f: - s2t_res = pickle.load(f) - s2t_source_test_acc = s2t_res["final_acc"] - - logger.info("\n>>>\nPredicting with source2target model\n>>>\n") - # predict on the target set - scnym_api( - adata=adata, - task="predict", - trained_model=s2t_out_path, - config=search_config, - ) - - # invert the problem -- train on the new labels - circ_adata = adata.copy() - circ_adata.obs[groupby] = adata.obs["scNym"] - circ_adata.obs.drop(columns=["scNym"], inplace=True) - # set the training data as unlabeled, leaving labels only on the target data - circ_adata.obs.loc[adata.obs[groupby] != UNLABELED_TOKEN, groupby] = UNLABELED_TOKEN - - # fit a new model - t2s_out_path = osp.join( - out_path, f"search_{search_number:04}_split_{split_number:04}_target2source" - ) - - logger.info("\n>>>\nTraining target2source model\n>>>\n") - - scnym_api( - adata=circ_adata, - groupby=groupby, - task="train", - out_path=t2s_out_path, - config=search_config, - ) - - # predict with new model - logger.info("\n>>>\nPredicting with target2source model\n>>>\n") - scnym_api( - adata=circ_adata, - task="predict", - trained_model=t2s_out_path, - config=search_config, - ) - - # evaluate the model - samples_bidx = adata.obs[groupby] != "Unlabeled" - samples_bidx = ( - samples_bidx & (adata.obs["scNym_split"] == "val") - if hold_out_only - else samples_bidx - ) - y_true = np.array(adata.obs[groupby])[samples_bidx] - y_pred = np.array(circ_adata.obs["scNym"])[samples_bidx] - - n_correct = np.sum(y_true == y_pred) - n_total = len(y_true) - acc = n_correct / n_total - - accession_keys_str = ["::".join(x) for x in accession_keys] - search_df = pd.DataFrame( - columns=accession_keys_str + ["acc"], - index=[search_number], - ) - search_df.loc[search_number] = params + (acc,) - search_df["test_source_acc"] = s2t_source_test_acc - - if groupby_eval is not None: - # compute the test accuracy in the target domain - # here, we use the predictions made by the source2target - # model stored in `adata.obs["scNym"]`. - samples_bidx = adata.obs[groupby] == "Unlabeled" - y_true = np.array(adata.obs[groupby_eval])[samples_bidx] - y_pred = np.array(adata.obs["scNym"])[samples_bidx] - n_correct = np.sum(y_true == y_pred) - test_acc = n_correct / len(y_true) - search_df["test_target_acc"] = "None" - search_df.loc[search_number, "test_target_acc"] = test_acc - - search_df.to_csv(osp.join(t2s_out_path, "result.csv")) - - return search_df - - -def scnym_tune( - adata: AnnData, - groupby: str, - parameters: dict, - search: str = "grid", - base_config: str = "no_new_identity", - n_points: int = 100, - out_path: str = "./scnym_tune", - hold_out_only: bool = True, - groupby_eval: str = None, - n_splits: int = 1, -) -> Tuple[pd.DataFrame, dict]: - """Perform hyperparameter tuning of an scNym model using - circular cross-validation. - - Parameters - ---------- - adata : anndata.AnnData - [Cells, Genes] experiment for optimization. - groupby : str - annotation column in `.obs`. - parameters : dict - key:List[value] pairs of parameters to use for - hyperparameter tuning. - base_config : str - one of {"no_new_identity", "new_identity_discovery"}. - base configuration for model training that described - default parameters, not explicitly provided in - `parameters`. - search : str - {"grid", "random"} perform either a random or grid - search over `parameters`. - n_points : int - number of random points to search if `search == "random"`. - out_path : str - path for intermediary files during hyperparameter tuning. - hold_out_only : bool - evaluate the circular accuracy only on a held-out set of - training data, not used in the training of the first - source -> target model. - groupby_eval : str - column in `adata.obs` containing ground truth labels - for the "Unlabeled" dataset to use for evaluation. - n_splits : int - number of train/test/val splits to perform for tuning. - performs at least 5-fold splitting and uses a subset of - the folds if `n_splits < 5`. - - Returns - ------- - tuning_results : pd.DataFrame - [n_points, (parameters,) + (circ_acc, circ_loss)] - best_parameter_set : dict - a configuration describing the best parameter set tested. - - Examples - -------- - >>> # `adata` contains labels in `.obs["annotations"]` where - ... # the target dataset is labeled "Unlabeled" - >>> tuning_results, best_parameters = scnym_tune( - ... adata=adata, - ... groupby="annotations", - ... parameters={ - ... "weight_decay": [1e-6, 1e-5, 1e-4], - ... "unsup_max_weight": [0.1, 1., 10.], - ... }, - ... base_config="no_new_identity", - ... search="grid", - ... out_path="./scnym_tuning", - ... n_splits=5, - ... ) - - Notes - ----- - Circular/Reverse cross-validation evaluates the impact of hyperparameter - selection in semi-supervised learning settings using the training data, - training labels, and target data, but not the target labels. - - This is achieved by training a model :math:`f` on the training set, then - predicting "pseudolabels" for the target set. - A second model :math:`g` is then trained on the target data and - the associated pseudolabels. - The model :math:`g` is used to predict labels for the *training* set. - The accuracy of this "reverse" prediction is then used as an estimate - of the effectiveness of a hyperparameter set. - """ - os.makedirs(out_path, exist_ok=True) - - # get the base configuration dict - # configurations have one layer of nested dictionaries within - config = CONFIGS.get(base_config, None) - if config is None: - msg = f"{base_config} is not a valid base configuration." - raise ValueError(msg) - - ################################################# - # get all possible combinations of parameters - ################################################# - # `_get_keys_and_list` traverses a nested dictionary and - # returns a List[list] of sequential keys to access each - # item in `parameter_ranges`. - # items in `parameter_ranges: List[list]` are lists of - # values for the parameter specified in `accession_keys`. - accession_keys, parameter_ranges = _get_keys_and_list(parameters) - # find all possible combinations of parameters - # each item in `param_sets` is a tuple of parameter values - # each element in the tuple matches the keys in `keys` with - # the same index. - param_sets = list( - itertools.product( - *parameter_ranges, - ) - ) - - ################################################# - # select a set of parameters to search - ################################################# - if search.lower() == "random": - # perform a random search by subsetting grid points - param_idx = np.random.choice( - len(param_sets), - size=n_points, - replace=False, - ) - else: - param_idx = range(len(param_sets)) - - ################################################# - # set a common train/test/val split for all params - ################################################# - - splits_provided = "scNym_split_0" in adata.obs.columns - splits_provided = splits_provided or "scNym_split" in adata.obs.columns - - if not splits_provided: - split_data( - adata, - groupby=groupby, - n_splits=n_splits, - ) - elif n_splits == 1 and "scNym_split" in adata.obs.columns: - adata.obs["scNym_split_0"] = adata.obs["scNym_split"] - elif n_splits > 1 and splits_provided: - # check that we have the relevant split for each fold - splits_correct = True - for s in range(n_splits): - splits_correct = splits_correct & (f"scNym_split_{s}" in adata.obs.columns) - if not splits_correct: - msg = '"scNym_split_" was provided with `n_splits>1.\n' - msg += 'f"scNym_split_{n}"" must be present in `adata.obs` for all {n} in `range(n_splits)`\n' - raise ValueError(msg) - else: - msg = "invalid argument for n_splits" - raise ValueError(msg) - - ################################################# - # circular training for each parameter set - ################################################# - - accession_keys_str = ["::".join(x) for x in accession_keys] - - search_results = [] - search_config_store = [] - for search_number, idx in enumerate(param_idx): - # get the parameter set - params = param_sets[idx] - # update the base config with search parameters - search_config = copy.deepcopy(config) - for p_i in range(len(params)): - keys2update = accession_keys[p_i] - value2set = params[p_i] - # updates in place - _updated_nested( - search_config, - keys2update, - value2set, - ) - - # disable checkpoints, tensorboard to reduce I/O - search_config["save_freq"] = 10000 - search_config["tensorboard"] = False - # add search number to config - search_config["search_number"] = search_number - - search_config_store.append( - copy.deepcopy(search_config), - ) - logger.info("searching config:") - logger.info(f"{search_config}") - - for split_number in range(n_splits): - # set the relevant split indices - adata.obs["scNym_split"] = adata.obs[f"scNym_split_{split_number}"] - # set the split number - split_config = copy.deepcopy(search_config) - split_config["split_number"] = split_number - search_df = _circular_train( - search_config=split_config, - params=params, - adata=adata, - groupby=groupby, - out_path=out_path, - accession_keys=accession_keys, - hold_out_only=hold_out_only, - groupby_eval=groupby_eval, - ) - # add the split information - search_df["split_number"] = split_number - search_df["search_number"] = search_number - # save results - search_results.append(search_df) - - # concatenate - search_results = pd.concat(search_results, 0) - best_idx = np.argmax(search_results["acc"]) - best_search = int(search_results.iloc[best_idx]["search_number"]) - - best_config = search_config_store[best_search] - print(">>>>>>") - print("Best config") - print(best_config) - print(">>>>>>") - print() - return search_results, best_config - - -def scnym_interpret( - adata: AnnData, - groupby: str, - source: str, - target: str, - trained_model: str, - **kwargs, -) -> dict: - """ - Extract salient features motivating scNym model predictions by estimating - expected gradients. - - Parameters - ---------- - adata - Annotated data matrix used for training or prediction. - If `"scNym_split"` in `.obs_keys()`, uses the cells annotated - `"train", "val"` to select data splits. - groupby - Column in `adata.obs` that contains cell identity annotations. - Values of `"Unlabeled"` indicate that a given cell should be used - only as unlabeled data during training. - source : str - class name for source class in `adata.obs[groupby]`. - target : str - class name for target class in `adata.obs[groupby]`. - trained_model - Path to the output directory of an scNym training run - or a string specifying a pretrained model. - If provided while `task == "train"`, used as an initialization. - kwargs : dict - keyword arguments passed to `scnym.interpret.ExpectedGradients.query(...)`. - - Returns - ------- - expgrad : dict - "gradients" - [Cells, Features] pd.DataFrame of expected gradients for - the target class. - "saliency" - [Features,] pd.Series of mean expected gradients across query - cells, sorted by saliency positive -> negative. - - See Also - -------- - scnym.interpret.ExpectedGradients - """ - # check if a pretrained model was requested - if "pretrained_" in trained_model: - msg = "Pretrained Request Error\n" - msg += "Pretrained weights are no longer supported in scNym.\n" - raise NotImplementedError(msg) - - # load training parameters - with open( - osp.join(trained_model, "scnym_train_results.pkl"), - "rb", - ) as f: - results = pickle.load(f) - - # setup a model object for interpretation - clf = model.CellTypeCLF( - n_genes=results["n_genes"], - n_cell_types=results["n_cell_types"], - **results["model_kwargs"], - ) - - model_weights_path = osp.join( - trained_model, - "00_best_model_weights.pkl", - ) - clf.load_state_dict( - torch.load( - model_weights_path, - map_location="cpu", - ) - ) - if torch.cuda.is_available(): - clf = clf.cuda() - logger.info("Model moved to CUDA compute device.") - - # setup expected gradients - EG = interpret.ExpectedGradient( - model=clf, - gene_names=np.array(results["gene_names"]), - class_names=np.array(results["class_names"]), - ) - - # perform expected gradient estimation - saliency = EG.query( - adata=adata, - source=source, - target=target, - cell_type_col=groupby, - **kwargs, - ) - gradients = EG.gradients - - r = { - "saliency": saliency, - "gradients": gradients, - } - return r diff --git a/build/lib/scnym/attributionpriors.py b/build/lib/scnym/attributionpriors.py deleted file mode 100644 index 978ec93..0000000 --- a/build/lib/scnym/attributionpriors.py +++ /dev/null @@ -1,605 +0,0 @@ -#!/usr/bin/env python -# adopted from https://github.com/suinleelab/attributionpriors -import functools -import operator -from typing import Callable, Union -import numpy as np -import torch -from torch.autograd import grad -from torch.utils.data import DataLoader -import logging - -logger = logging.getLogger(__name__) - -DEFAULT_DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - -def gather_nd(params, indices): - """ - Args: - params: Tensor to index - indices: k-dimension tensor of integers. - Returns: - output: 1-dimensional tensor of elements of ``params``, where - output[i] = params[i][indices[i]] - - params indices output - - 1 2 1 1 4 - 3 4 2 0 ----> 5 - 5 6 0 0 1 - """ - max_value = functools.reduce(operator.mul, list(params.size())) - 1 - indices = indices.t().long() - ndim = indices.size(0) - idx = torch.zeros_like(indices[0]).long() - m = 1 - - for i in range(ndim)[::-1]: - idx += indices[i] * m - m *= params.size(i) - - idx[idx < 0] = 0 - idx[idx > max_value] = 0 - return torch.take(params, idx) - - -def adj2lap( - adj: torch.FloatTensor, -) -> torch.FloatTensor: - """Convert an adjacency matrix to a graph Laplacian - - Notes - ----- - Graph Laplacian is - - .. math:: - - L = D - A - - where :math:`D` is a diagonal matrix with the degree of - each node and :math:`A` is the graph adjacency matrix. - """ - adj = (adj > 0).float() - row_sum = torch.sum(adj, dim=1) - # constructs [n_vertices, n_vertices] with row_sum on diagonal - D = torch.diag(row_sum) - return D - adj - - -def tgini(x): - mad = torch.mean(torch.abs(x.reshape(-1, 1) - x.reshape(1, -1))) - rmad = mad / torch.mean(x) - g = 0.5 * rmad - return g - - -def gini_eg(shaps: torch.FloatTensor) -> torch.FloatTensor: - """Gini coefficient sparsity prior - - Parameters - ---------- - shaps : torch.FloatTensor - [Observations, Features] estimated Shapley values. - - Returns - ------- - gini_prior : torch.FloatTensor - inverse Gini coefficient prior penalty. - """ - abs_attrib = shaps.abs() - return -tgini(abs_attrib.mean(0)) - - -def gini_classwise_eg( - shaps: torch.FloatTensor, - target: torch.LongTensor, -) -> torch.FloatTensor: - """Compute Gini coefficient sparsity prior within individual - classes. This allows each class to have a unique set of sparsely - activated features, rather than globally requiring all classes - to use the same small feature set. - - Parameters - ---------- - shaps : torch.FloatTensor - [Observations, Features] estimated Shapley values. - target : torch.LongTenspr - [Observations,] int class labels. - - Returns - ------- - gini_prior : torch.FloatTensor - inverse Gini coefficient prior penalty. - """ - classes = torch.unique(target) - ginis = torch.zeros((len(classes),)).to(device=shaps.device) - n_obs = torch.zeros((len(classes),)).to(device=shaps.device) - for i, c in enumerate(classes): - c_shaps = shaps[target == c] - c_gini = gini_eg(c_shaps) - ginis[i] = c_gini - n_obs[i] = c_shaps.size(0) - # compute weighted gini coefficient - p_obs = n_obs / torch.sum(n_obs) - weighted_gini = torch.sum(p_obs * ginis) - return weighted_gini - - -def graph_eg( - shaps: torch.FloatTensor, - graph: torch.FloatTensor, -) -> torch.FloatTensor: - """Graph attribution prior - - Parameters - ---------- - shaps : torch.FloatTensor - [Observations, Features] estimated Shapley values. - graph : torch.FloatTensor - [Features, Features] adjacency matrix (weighted or binary). - - Returns - ------- - graph_prior : torch.FloatTensor - graph prior penalty. - """ - # get mean gradient for each feature - feature_grad = torch.mean(shaps, dim=0) - # get a matrix of differences between feature grads - cols = feature_grad.view(1, -1).repeat(feature_grad.size(0), 1) - rows = feature_grad.view(-1, 1).repeat(1, feature_grad.size(0)) - # delta[i, j] is grad_i - grad_j - delta = rows - cols - # "Gaussian" penalty is just square of delta - penalty = torch.pow(delta, 2) - weighted_penalty = penalty * graph - return weighted_penalty - - -def check(key, sets: dict, reference: set) -> list: - return [x in reference for x in sets[key]] - - -class AttributionPriorExplainer(object): - def __init__( - self, - background_dataset: torch.utils.data.Dataset, - batch_size: int, - random_alpha: bool = True, - k: int = 1, - scale_by_inputs: bool = True, - abs_scale: bool = True, - input_batch_index: Union[str, int, tuple] = None, - ) -> None: - """Estimates feature gradients using expected gradients. - - Parameters - ---------- - background_dataset : torch.utils.data.Dataset - dataset of samples to use as background references. - most commonly, this is the whole training set. - batch_size : int - batch size used for training. must be the same as the - batch size for the training dataloader. - random_alpha : bool - use randomized `alpha ~ Unif(0, 1)` values for computing - an intermediary sample between the reference and target - sample at each minibatch. - k : int - number of references to use per training example per minibatch. - `k=1` works well as a default with minimal computational - overhead. - scale_by_inputs : bool - scale expected gradient values using a dot-product with the - difference `(input-reference)` feature values. - abs_scale : bool - only considered if `scale_by_inputs=True`. Rather than scaling - by the raw difference, scale by the absolute value of the - difference. - input_batch_index : Union[str,int,tuple], optional - key for extracting the input values from a batch drawn from - `background_dataset`. e.g. if batches are stored in `dict`, - this is the key for the input tensor. if batches are `tuple`, - this is the index of the input tensor. - - Returns - ------- - None. - - References - ---------- - https://github.com/suinleelab/attributionpriors - """ - self.random_alpha = random_alpha - self.k = k - self.scale_by_inputs = scale_by_inputs - self.abs_scale = abs_scale - self.batch_size = batch_size - self.ref_set = background_dataset - self.ref_sampler = DataLoader( - dataset=background_dataset, - batch_size=batch_size * k, - shuffle=True, - drop_last=True, - ) - self.input_batch_index = input_batch_index - return - - def _get_ref_batch( - self, - k=None, - ): - """Get a batch from the reference dataset""" - b = next(iter(self.ref_sampler)) - if self.input_batch_index is not None: - # extract the input tensor using a provided index - b = b[self.input_batch_index].float() - b = b.to(device=self.DEFAULT_DEVICE) - if self.batch_transformation is not None: - # transform the reference batch with a specified transformation - b = self.batch_transformation(b) - return b - - def _get_samples_input( - self, - input_tensor: torch.FloatTensor, - reference_tensor: torch.FloatTensor, - ) -> torch.FloatTensor: - """ - Calculate interpolation points between input samples and reference - samples. - - Parameters - ---------- - input_tensor : torch.FloatTensor - shape (batch, ...), where ... indicates the input dimensions. - reference_tensor : torch.FloatTensor - shape (batch, k, ...) where k represents the number of - background reference samples to draw per input in the batch. - - Returns - ------- - samples_input : torch.FloatTensor - shape (batch, k, ...) with the interpolated points between - input and ref. - - Notes - ----- - For integrated gradients, we compute some `M=100+` samples interpolating - between each input and a relevant reference sample. For expected - gradients, we rather compute interpolation points that lie randomly - along the linear path between the sample and reference in each minibatch. - """ - input_dims = list(input_tensor.size())[1:] - num_input_dims = len(input_dims) - - batch_size = reference_tensor.size()[0] - k_ = reference_tensor.size()[1] - - # Grab a [batch_size, k]-sized interpolation sample - if self.random_alpha: - t_tensor = ( - torch.FloatTensor(batch_size, k_).uniform_(0, 1).to(self.DEFAULT_DEVICE) - ) - else: - if k_ == 1: - t_tensor = torch.cat( - [torch.Tensor([1.0]) for i in range(batch_size)] - ).to(self.DEFAULT_DEVICE) - else: - t_tensor = torch.cat( - [torch.linspace(0, 1, k_) for i in range(batch_size)] - ).to(self.DEFAULT_DEVICE) - - shape = [batch_size, k_] + [1] * num_input_dims - interp_coef = t_tensor.view(*shape) - - # Evaluate the end points - end_point_ref = (1.0 - interp_coef) * reference_tensor - - input_expand_mult = input_tensor.unsqueeze(1) - end_point_input = interp_coef * input_expand_mult - - # A fine Affine Combine - samples_input = end_point_input + end_point_ref - return samples_input - - def _get_samples_delta( - self, - input_tensor: torch.FloatTensor, - reference_tensor: torch.FloatTensor, - ) -> torch.FloatTensor: - """Compute the distance in feature space between input samples - and reference samples. - - Parameters - ---------- - input_tensor : torch.FloatTensor - shape (batch, ...), where ... indicates the input dimensions. - reference_tensor : torch.FloatTensor - shape (batch, k, ...) where k represents the number of - background reference samples to draw per input in the batch. - - Returns - ------- - sd : torch.FloatTensor - (batch, k, ...) differences in each feature between input - samples and the assigned reference. - """ - input_expand_mult = input_tensor.unsqueeze(1) - sd = input_expand_mult - reference_tensor - if self.abs_scale: - sd = torch.abs(sd) - return sd - - def _get_grads( - self, - samples_input: torch.FloatTensor, - model: torch.nn.Module, - sparse_labels: torch.LongTensor = None, - ) -> torch.FloatTensor: - """Compute gradients for a given model and input tensor, - taking into account sparse labels if provided. - - Parameters - ---------- - samples_input : torch.FloatTensor - (batch, k, ...) input features. - during training, these are interpolated samples between input - and reference. - during evaluation, these are raw input samples. - model : torch.nn.Module - model for evaluation. - sparse_labels : torch.LongTensor, optional - (batch, classes) one-hot labels for class assignments. - must be provided if `classes > 1`. - - Returns - ------- - grad_tensor : torch.FloatTensor - (batch, ...) gradient values - """ - samples_input.requires_grad = True - - grad_tensor = torch.zeros(samples_input.shape).float().to(self.DEFAULT_DEVICE) - - for i in range(self.k): - particular_slice = samples_input[:, i] - batch_output = model(particular_slice) - # should check that users pass in sparse labels - # Only look at the user-specified label - # if there is only one class, `batch_output` is already `(batch, 1)` - if batch_output.size(1) > 1: - if sparse_labels is None: - msg = "`sparse_labels` must be provided if more than one\n" - msg += "output class is present." - raise TypeError(msg) - - sample_indices = torch.arange(0, batch_output.size(0)).to( - self.DEFAULT_DEVICE - ) - indices_tensor = torch.cat( - [ - sample_indices.unsqueeze(1), - sparse_labels.unsqueeze(1), - ], - dim=1, - ) - # gathers the relevant class output for each sample to create - # batch_output shape : (batch, 1). - batch_output = gather_nd(batch_output, indices_tensor) - - model_grads = grad( - outputs=batch_output, - inputs=particular_slice, - grad_outputs=torch.ones_like(batch_output).to(self.DEFAULT_DEVICE), - create_graph=True, - ) - grad_tensor[:, i, :] = model_grads[0] - return grad_tensor - - def shap_values( - self, - model: torch.nn.Module, - input_tensor: torch.FloatTensor, - sparse_labels: torch.LongTensor = None, - batch_transformation: Callable = None, - ) -> torch.FloatTensor: - """ - Calculate expected gradients approximation of Shapley values for the - sample ``input_tensor``. - - Parameters - ---------- - model : torch.nn.Module - Pytorch model for which the output should be explained. - input_tensor : torch.Tensor - (batch, ...) tensor representing the input to be explained, - where `...` are feature dimensions. - sparse_labels : torch.LongTensor, optional - (batch, classes) one-hot class labels. - not required if only one output class is present. - batch_transformation : Callable, optional. - transformation to apply to reference batches after drawing. - - Returns - ------- - expected_grads : torch.FloatTensor - (batch, ...) expected gradients for each sample in the input. - """ - # set device to use - self.DEFAULT_DEVICE = list(model.parameters())[0].device - # set a batch transformation if applicable - self.batch_transformation = batch_transformation - if batch_transformation is not None and not callable(batch_transformation): - msg = "`batch_transformation` arguments must be callable." - raise TypeError(msg) - # sample a batch from the reference dataset and reshape - # to match the inputs - reference_tensor = self._get_ref_batch() - shape = reference_tensor.shape - reference_tensor = reference_tensor.view( - self.batch_size, self.k, *(shape[1:]) - ).to(self.DEFAULT_DEVICE) - # get interpolation points between provided inputs and the - # assigned reference sample for each sample in the batch - samples_input = self._get_samples_input(input_tensor, reference_tensor) - # compute the difference across each feature between - # input and reference samples - samples_delta = self._get_samples_delta(input_tensor, reference_tensor) - # compute gradients on label scores w.r.t. the interpolation inputs - grad_tensor = self._get_grads(samples_input, model, sparse_labels) - # scale the gradient tensor by the difference - mult_grads = ( - samples_delta * grad_tensor if self.scale_by_inputs else grad_tensor - ) - expected_grads = mult_grads.mean(1) - return expected_grads - - -class VariableBatchExplainer(AttributionPriorExplainer): - """ - Subclasses AttributionPriorExplainer to avoid pre-specified batch size. Will adapt batch - size based on shape of input tensor. - """ - - def __init__(self, background_dataset, random_alpha=True, scale_by_inputs=True): - """ - Arguments: - background_dataset: PyTorch dataset - may not work with iterable-type (vs map-type) datasets - random_alpha: boolean - Whether references should be interpolated randomly (True, corresponds - to Expected Gradients) or on a uniform grid (False - corresponds to Integrated Gradients) - """ - self.random_alpha = random_alpha - self.k = None - self.scale_by_inputs = scale_by_inputs - self.ref_set = background_dataset - self.ref_sampler = DataLoader( - dataset=background_dataset, batch_size=1, shuffle=True, drop_last=True - ) - self.refs_needed = -1 - return - - def _get_ref_batch(self, refs_needed=None): - """ - Arguments: - refs_needed: int - number of references to provide - """ - if refs_needed != self.refs_needed: - self.ref_sampler = DataLoader( - dataset=self.ref_set, - batch_size=refs_needed, - shuffle=True, - drop_last=True, - ) - self.refs_needed = refs_needed - return next(iter(self.ref_sampler))[0].float() - - def shap_values(self, model, input_tensor, sparse_labels=None, k=1): - """ - Arguments: - base_model: PyTorch network - input_tensor: PyTorch tensor to get attributions for, as in normal torch.nn.Module API - sparse_labels: np.array of sparse integer labels, i.e. 0-9 for MNIST. Used if you only - want to explain the prediction for the true class per sample. - k: int - Number of references to use default for explanations. As low as 1 for training. - 100-200 for reliable explanations. - """ - self.k = k - n_input = input_tensor.shape[0] - refs_needed = n_input * self.k - # This is a reasonable check but prevents compatibility with non-Map datasets - assert refs_needed <= len( - self.ref_set - ), "Can't have more samples*references than there are reference points!" - reference_tensor = self._get_ref_batch(refs_needed) - shape = reference_tensor.shape - reference_tensor = reference_tensor.view(n_input, self.k, *(shape[1:])).to( - DEFAULT_DEVICE - ) - samples_input = self._get_samples_input(input_tensor, reference_tensor) - samples_delta = self._get_samples_delta(input_tensor, reference_tensor) - grad_tensor = self._get_grads(samples_input, model, sparse_labels) - mult_grads = ( - samples_delta * grad_tensor if self.scale_by_inputs else grad_tensor - ) - expected_grads = mult_grads.mean(1) - - return expected_grads - - -class ExpectedGradientsModel(torch.nn.Module): - """ - Wraps a PyTorch model (one that implements torch.nn.Module) so that model(x) - produces SHAP values as well as predictions (controllable by 'shap_values' - flag. - """ - - def __init__( - self, base_model, refset, k=1, random_alpha=True, scale_by_inputs=True - ): - """ - Arguments: - base_model: PyTorch network that subclasses torch.nn.Module - refset: PyTorch dataset - may not work with iterable-type (vs map-type) datasets - k: int - Number of references to use by default for explanations. As low as 1 for training. - 100-200 for reliable explanations. - """ - super(ExpectedGradientsModel, self).__init__() - self.k = k - self.base = base_model - self.refset = refset - self.random_alpha = random_alpha - self.exp = VariableBatchExplainer( - self.refset, - random_alpha=random_alpha, - scale_by_inputs=scale_by_inputs, - ) - - def forward(self, x, shap_values=False, sparse_labels=None, k=1): - """ - Arguments: - x: PyTorch tensor to predict with, as in normal torch.nn.Module API - shap_values: Binary flag -- whether to produce SHAP values - sparse_labels: np.array of sparse integer labels, i.e. 0-9 for MNIST. Used if you only - want to explain the prediction for the true class per sample. - k: int - Number of references to use default for explanations. As low as 1 for training. - 100-200 for reliable explanations. - """ - output = self.base(x) - if not shap_values: - return output - else: - shaps = self.exp.shap_values(self.base, x, sparse_labels=sparse_labels, k=k) - return output, shaps - - -def tmp(): - """ - def convert_csr_to_sparse_tensor_inputs(X): - coo = sp.coo_matrix(X) - indices = np.mat([coo.row, coo.col]).transpose() - return indices, coo.data, coo.shape - - def graph_mult(values, indices, shape, y): - # sparse tensor multiplication function - x_tensor = tf.SparseTensor(indices, values, shape) - out_layer = tf.sparse_tensor_dense_matmul(x_tensor, y) - return out_layer - - def adj_to_lap(x): - # calculate graph laplacian from adjacency matrix - rowsum = np.array(x.sum(1)) - D = sp.diags(rowsum) - return D - x - - adj = adj_to_lap(adj) - adj_indices, adj_values, adj_shape = convert_csr_to_sparse_tensor_inputs(adj) - - # ... during training ... - ma_eg = tf.reduce_mean(tf.abs(expected_gradients_op),axis=0) - graph_reg = tf.matmul(tf.transpose(graph_mult(adj_values, adj_indices, adj_shape, ma_eg[145:,:])),ma_eg[145:,:]) - """ - # pass - return diff --git a/build/lib/scnym/dataprep.py b/build/lib/scnym/dataprep.py deleted file mode 100644 index 1dbf1f0..0000000 --- a/build/lib/scnym/dataprep.py +++ /dev/null @@ -1,765 +0,0 @@ -import torch -import numpy as np -from scipy import sparse -from torch.utils.data import Dataset -from typing import Callable, Any, Union -import logging - - -logger = logging.getLogger(__name__) - - -class SingleCellDS(Dataset): - """Dataset class for loading single cell profiles. - - Attributes - ---------- - X : np.ndarray, sparse.csr_matrix - [Cells, Genes] cell profiles. - y_labels : np.ndarray, sparse.csr_matrix - [Cells,] integer class labels. - y : torch.FloatTensor - [Cells, Classes] one hot labels. - transform : Callable - performs data transformation operations on a - `sample` dict. - num_classes : int - number of classes in the dataset. default `-1` infers - the number of classes as `len(unique(y))`. - """ - - def __init__( - self, - X: Union[sparse.csr.csr_matrix, np.ndarray], - y: Union[sparse.csr.csr_matrix, np.ndarray], - domain: Union[sparse.csr.csr_matrix, np.ndarray] = None, - transform: Callable = None, - num_classes: int = -1, - num_domains: int = -1, - ) -> None: - """ - Load single cell expression profiles. - - Parameters - ---------- - X : np.ndarray, sparse.csr_matrix - [Cells, Genes] expression count matrix. - scNym models expect ln(Counts Per Million + 1). - Pathfinder models expect raw counts. - y : np.ndarray, sparse.csr_matrix - [Cells,] integer cell type labels. - domain : np.ndarray, sparse.csr_matrix - [Cells,] integer domain labels. - transform : Callable - transform to apply to samples. - num_classes : int - total number of classes for the task. - num_domains : int - total number of domains for the task. - - Returns - ------- - None. - """ - super(SingleCellDS, self).__init__() - - # check types on input arrays - if type(X) not in ( - np.ndarray, - sparse.csr_matrix, - ): - msg = f"X is type {type(X)}, must `np.ndarray` or `sparse.csr_matrix`" - raise TypeError(msg) - - if type(y) not in ( - np.ndarray, - sparse.csr_matrix, - ): - msg = f"X is type {type(y)}, must `np.ndarray` or `sparse.csr_matrix`" - raise TypeError(msg) - - if type(y) != np.ndarray: - # densify labels - y = y.toarray() - - self.X = X - self.y_labels = torch.from_numpy(y).long() - self.y = torch.nn.functional.one_hot( - self.y_labels, - num_classes=num_classes, - ).float() - - self.dom_labels = domain - if self.dom_labels is not None: - self.dom = torch.nn.functional.one_hot( - torch.from_numpy(self.dom_labels).long(), - num_classes=num_domains, - ).float() - else: - self.dom = np.zeros_like(self.y) - 1 - - self.transform = transform - - if not self.X.shape[0] == self.y.shape[0]: - sizes = (self.X.shape[0], self.y.shape[0]) - raise ValueError("X rows %d not equal to y rows %d." % sizes) - return - - def __len__( - self, - ) -> int: - """Return the number of examples in the data set.""" - return self.X.shape[0] - - def __getitem__( - self, - idx: int, - ) -> dict: - """Get a single cell expression profile and corresponding label. - - Parameters - ---------- - idx : int - index value in `range(len(self))`. - - Returns - ------- - sample : dict - 'input' - torch.FloatTensor, input vector - 'output' - torch.LongTensor, target label - """ - if type(idx) != int: - raise TypeError(f"indices must be int, you passed {type(idx)}, {idx}") - - # check if the idx value is valid given the dataset size - if idx < 0 or idx > len(self): - vals = (idx, len(self)) - raise ValueError("idx %d is invalid for dataset with %d examples." % vals) - - # retrieve relevant sample vector and associated label - # store in a hash table for later manipulation and retrieval - - # input_ is either an `np.ndarray` or `sparse.csr.csr_matrix` - input_ = self.X[idx, ...] - # label is already a `torch.Tensor` - label = self.y[idx] - - # if the corresponding vectors are sparse, convert them to dense - # we perform this operation on a samplewise-basis to avoid - # storing the whole count matrix in dense format - if type(input_) != np.ndarray: - input_ = input_.toarray() - - input_ = torch.from_numpy(input_).float() - if input_.size(0) == 1: - input_ = input_.squeeze() - - sample = { - "input": input_, - "output": label, - } - - sample["domain"] = self.dom[idx] - - # if a transformer was supplied, apply transformations - # to the sample vector and label - if self.transform is not None: - sample = self.transform(sample) - return sample - - -def balance_classes( - y: np.ndarray, - class_min: int = 256, -) -> np.ndarray: - """ - Perform class balancing by undersampling majority classes - and oversampling minority classes, down to a minimum value. - - Parameters - ---------- - y : np.ndarray - class assignment indices. - class_min : int - minimum number of examples to use for a class. - below this value, minority classes will be oversampled - with replacement. - - Returns - ------- - all_idx : np.ndarray - indices for balanced classes. some indices may be repeated. - """ - # determine the size of the smallest class - # if < `class_min`, we oversample to `class_min` samples. - classes, counts = np.unique(y, return_counts=True) - min_count = int(np.min(counts)) - if min_count < class_min: - min_count = class_min - - # generate indices with equal representation of each class - all_idx = [] - for i, c in enumerate(classes): - class_idx = np.where(y == c)[0].astype("int") - rep = counts[i] < min_count # oversample minority classes - if rep: - print("Count for class %s is %d. Oversampling." % (c, counts[i])) - ridx = np.random.choice(class_idx, size=min_count, replace=rep) - all_idx += [ridx] - all_idx = np.concatenate(all_idx).astype("int") - return all_idx - - -class LibrarySizeNormalize(object): - """Perform library size normalization.""" - - def __init__( - self, - counts_per_cell_after: int = int(1e6), - log1p: bool = True, - ) -> None: - self.counts_per_cell_after = counts_per_cell_after - self.log1p = log1p - return - - def __call__( - self, - sample: dict, - ) -> dict: - """Perform library size normalization in-place - on a sample dict. - - Parameters - ---------- - sample : dict - 'input' - torch.FloatTensor, input vector [N, C] - 'output' - torch.LongTensor, target label [N,] - - Returns - ------- - sample : dict - 'input' - torch.FloatTensor, input vector [N, C] - 'output' - torch.LongTensor, target label [N,] - """ - input_ = sample["input"] - size = torch.sum(input_, dim=1).reshape(-1, 1) - - # get proportions of each feature per sample, - # scale by `counts_per_cell_after` - prop_input_ = input_ / size - norm_input_ = prop_input_ * self.counts_per_cell_after - if self.log1p: - norm_input_ = torch.log1p(norm_input_) - sample["input"] = norm_input_ - return sample - - -class ExpMinusOne(object): - def __init__( - self, - ) -> None: - """Perform an exponential minus one transformation - on an input vector""" - return - - def __call__( - self, - sample: dict, - ) -> dict: - """Perform an exponential minus one transformation - on the sample input.""" - sample["input"] = torch.expm1( - sample["input"], - ) - return sample - - -class MultinomialSample(object): - """Sample an mRNA abundance profile from a multinomial - distribution parameterized by observations. - """ - - def __init__( - self, - depth: tuple = (10000, 100000), - depth_ratio: tuple = None, - ) -> None: - """Sample an mRNA abundance profile from a multinomial - distribution parameterized by observations. - - Parameters - ---------- - depth : tuple - (min, max) depth for multinomial sampling. - depth_ratio : tuple - (min, max) ratio of profile depth for multinomial - sampling. supercedes `depth`. - - Returns - ------- - None. - """ - self.depth = depth - self.depth_ratio = depth_ratio - - if self.depth_ratio is not None: - self.depth = None - - return - - def __call__( - self, - sample: dict, - ) -> dict: - """ - Sample an mRNA profile from a multinomial - parameterized by observations. - - Parameters - ---------- - sample : dict - 'input' - torch.FloatTensor, input vector [N, C] - 'output' - torch.LongTensor, target label [N,] - - Returns - ------- - sample : dict - 'input' - torch.FloatTensor, input vector [N, C] - 'output' - torch.LongTensor, target label [N,] - - Notes - ----- - We perform multinomial sampling with a call to `np.random.multinomial` - for each observation. This may be faster in the future using the native - `torch.distributions.Multinomial`, but right now the sampling procedure - is incredibly slow. The implementation below is ~100X slower than our - `numpy` calls. - - ``` - multi = torch.distributions.Multinomial( - total_count=d, - probs=p, - ) - - m = multi.sample() - m = m.float() - ``` - - Follow: - https://github.com/pytorch/pytorch/issues/11931 - """ - # input is a torch.FloatTensor - # we assume x is NOT log-transformed - # cast to float64 to preserve precision of proportions - x = sample["input"].to(torch.float64) - size = torch.sum(x, dim=1).detach().cpu().numpy() - - # generate a relative abundance profile - p = x / torch.sum(x, dim=1).reshape(-1, 1) - # normalize to ensure roundoff errors don't - # give us p.sum() > 1 - idx = torch.where(p.sum(1) > 1) - for i in idx[0]: - p[i, :] = p[i, :] / np.min([p[i, :].sum(), 1.0]) - # sample a sequencing depth - if self.depth_ratio is None: - # tile the specified depth for all cells - depth = np.tile(np.array(self.depth).reshape(1, -1), (x.size(0), 1)).astype( - np.int - ) - else: - # compute a range of depths based on the library size - # of each observation - depth = np.concatenate( - [ - np.floor(self.depth_ratio[0] * size).reshape(-1, 1), - np.ceil(self.depth_ratio[1] * size).reshape(-1, 1), - ], - axis=1, - ).astype(np.int) - - # sample from a multinomial - # np.random.multinomial is ~100X faster than the native - # torch.distributions.Multinomial, implemented in Notes - m = np.zeros(x.size()) - for i in range(x.size(0)): - - d = int( - np.random.choice( - np.arange(depth[i, 0], depth[i, 1]), - size=1, - ) - ) - - m[i, :] = np.random.multinomial( - d, - pvals=p[i, :].detach().cpu().numpy(), - ) - m = torch.from_numpy(m).float() - m = m.to(device=x.device) - output = { - "input": m, - "output": sample["output"], - } - return output - - -class GeneMasking(object): - def __init__( - self, - p_drop: float = 0.1, - p_apply: float = 0.5, - sample_p_drop: bool = False, - ) -> None: - """Mask a subset of genes in the gene expression vector - with zeros. This may simulate a failed detection event. - This mask is applied to `p_apply`*100% of input vectors. - - Parameters - ---------- - p_drop : float - proportion of genes to mask with zeros. - p_apply : float - proportion of samples to mask. - sample_p_drop : bool - sample the proportion of genes to drop from - `Unif(0, p_drop)`. - - Returns - ------- - None. - """ - self.p_drop = p_drop - self.p_apply = p_apply - self.sample_p_drop = sample_p_drop - return - - def __call__( - self, - sample: dict, - ) -> dict: - """Mask a subset of genes.""" - do_apply = np.random.random() - if do_apply > self.p_apply: - # no-op - return sample - - # input is a torch.FloatTensor - x = sample["input"].clone() - - if self.sample_p_drop: - p_drop = np.random.random() * self.p_drop - else: - p_drop = self.p_drop - - # mask a proportion `p` of genes with `0` - # assume x [N, Genes] - n_genes = x.size(1) - for i in range(x.size(0)): - idx = np.random.choice( - np.arange(n_genes), - size=int(np.floor(n_genes * p_drop)), - replace=False, - ).astype(np.int) - x[i, idx] = 0 - - sample["input"] = x - return sample - - -class InputDropout(object): - def __init__( - self, - p_drop: float = 0.1, - ) -> None: - """Randomly mask `p_drop` genes. - - Parameters - ---------- - p_drop : float - proportion of genes to mask. - - Returns - ------- - None - """ - self.p_drop = p_drop - return - - def __call__( - self, - sample: dict, - ) -> dict: - sample["input"] = torch.nn.functional.dropout( - sample["input"], - p=self.p_drop, - inplace=False, - ) - return sample - - -class PoissonSample(object): - """Sample a gene expression profile based on gene-specific - Poisson distributions""" - - def __init__( - self, - depth: Union[float, tuple] = 1.0, - ) -> None: - """Sample a gene expression profile based on gene-specific - Poisson distributions. - - Parameters - ---------- - depth : tuple, float - (min_factor, max_factor) for scaling the rate of the Poisson - that samples are drawn from. Scaling down produces sparser - profiles, scaling up produces less sparse profiles. - if `float`, uses a single depth value. Default = 1. - - Returns - ------- - None. - - Notes - ----- - Treats a raw gene count as an estimate of the rate for a Poisson - distribution. - """ - self.depth = depth - return - - def __call__( - self, - sample: dict, - ) -> dict: - # input is a torch.FloatTensor - # we assume x is NOT log-transformed - x = sample["input"].to(torch.float64) - - if type(self.depth) != float: - # sample a scale factor for the rate in the specified interval - # Unif(r1, r2) = Unif(0, 1) * (r1 - r2) + r2 - logging.debug("Multiscale Poisson depths") - r = torch.rand(x.size(0)).to(device=x.device) - r = r * (self.depth[0] - self.depth[1]) + self.depth[1] - else: - logging.debug("Single scale Poisson sampling") - r = torch.ones(x.size(0)).to(device=x.device) - r *= self.depth - - logger.debug(f"Poisson rate: {r}") - logger.debug(f"Poisson sample: {x}") - # torch Poisson can't handle rates equal to zero - # here we manually set zero rates to eps, then zero - # them back out later - rate = x * r.view(-1, 1) - rate[x == 0.0] = 1.0 - P = torch.distributions.Poisson( - rate=rate, - ) - x_poisson = P.sample() - x_poisson[x == 0.0] = 0.0 - - assert x.size() == x_poisson.size() - - sample["input"] = x_poisson.float() - return sample - - -"""Implement MixUp training""" - - -def mixup( - a: torch.FloatTensor, - b: torch.FloatTensor, - gamma: torch.FloatTensor, -) -> torch.FloatTensor: - """Perform a MixUp operation. - This is effectively just a weighted average, where - `gamma = 0.5` yields the mean of `a` and `b`. - - Parameters - ---------- - a : torch.FloatTensor - [Batch, C] first sample matrix. - b : torch.FloatTensor - [Batch, C] second sample matrix. - gamma : torch.FloatTensor - [Batch,] MixUp coefficient. - - Returns - ------- - m : torch.FloatTensor - [Batch, C] mixed sample matrix. - """ - return gamma * a + (1 - gamma) * b - - -class SampleMixUp(object): - def __init__( - self, - alpha: float = 0.2, - keep_dominant_obs: bool = False, - ) -> None: - """Perform a MixUp operation on a sample batch. - - Parameters - ---------- - alpha : float - alpha parameter of the Beta distribution. - keep_dominant_obs : bool - use max(gamma, 1-gamma) for each pair of samples - so the identity of the dominant observation can be - associated with the mixed sample. - - Returns - ------- - None. - - References - ---------- - mixup: Beyond Empirical Risk Minimization - Hongyi Zhang, Moustapha Cisse, Yann N. Dauphin, David Lopez-Paz - arXiv:1710.09412 - - Notes - ----- - Zhang et. al. note alpha [0.1, 0.4] improve performance on CIFAR-10, - while larger values of alpha induce underfitting. - """ - self.alpha = alpha - if alpha > 0.0: - self.beta = torch.distributions.beta.Beta( - self.alpha, - self.alpha, - ) - self.keep_dominant_obs = keep_dominant_obs - return - - def __call__( - self, - sample: dict, - ) -> dict: - """Perform a MixUp operation on the sample. - - Parameters - ---------- - sample : dict - 'input' - torch.FloatTensor, input vector - 'output' - torch.LongTensor, target label - - Returns - ------- - sample : dict - 'input' - torch.FloatTensor, input vector - 'output' - torch.LongTensor, target label - """ - if self.alpha == 0.0: - # mixup is deactivated, return the original - # sample without mixing - return sample - - input_ = sample["input"] - output = sample["output"] - - # randomly permute the input and output - ridx = torch.randperm(input_.size(0)) - r_input_ = input_[ridx] - r_output = output[ridx] - - # perform the mixup operation between the source - # data and the rearranged data -- random pairs - gamma = self.beta.sample((input_.size(0),)) - if self.keep_dominant_obs: - gamma, _ = torch.max( - torch.stack( - [ - gamma, - 1 - gamma, - ], - dim=1, - ), - dim=1, - ) - gamma = gamma.reshape(-1, 1) - # move gamma weights to the same device as the - # inputs - gamma = gamma.to(device=input_.device) - - mix_input_ = mixup(input_, r_input_, gamma=gamma) - mix_output = mixup(output, r_output, gamma=gamma) - - sample["input"] = mix_input_ - sample["output"] = mix_output - - # if there are additional tensors in sample, also mix - # them up - other_keys = [k for k in sample.keys() if k not in ("input", "output")] - for k in other_keys: - if type(sample[k]) == torch.Tensor: - sample[k] = mixup(sample[k], sample[k][ridx], gamma=gamma) - - # add the randomization index to the sample in case - # it's useful downstream - sample["random_idx"] = ridx - - return sample - - -################################################# -# Define augmentation series -################################################# - -from torchvision import transforms - - -def identity(x: Any) -> Any: - """Identity function""" - return x - - -AUGMENTATION_SCHEMES = { - "log1p_drop": transforms.Compose( - [ - ExpMinusOne(), - InputDropout( - p_drop=0.1, - ), - LibrarySizeNormalize(log1p=True), - ] - ), - "log1p_mask": transforms.Compose( - [ - ExpMinusOne(), - GeneMasking( - p_drop=0.1, - p_apply=0.5, - ), - LibrarySizeNormalize(log1p=True), - ] - ), - "log1p_poisson": transforms.Compose( - [ - ExpMinusOne(), - PoissonSample(), - LibrarySizeNormalize(log1p=True), - ] - ), - "log1p_poisson_drop": transforms.Compose( - [ - ExpMinusOne(), - PoissonSample(depth=(0.1, 2.0)), - InputDropout(p_drop=0.1), - LibrarySizeNormalize(log1p=True), - ] - ), - "count_poisson": transforms.Compose( - [ - PoissonSample(), - ] - ), - "None": identity, - "none": identity, - None: identity, -} diff --git a/build/lib/scnym/distributions.py b/build/lib/scnym/distributions.py deleted file mode 100644 index a0f1aad..0000000 --- a/build/lib/scnym/distributions.py +++ /dev/null @@ -1,420 +0,0 @@ -"""torch Distributions for use with scNym models - -Negative Binomial adopted from scvi-tools -https://github.com/YosefLab/scvi-tools/blob/42315756ba879b9421630696ea7afcd74e012a07/scvi/distributions/_negative_binomial.py -""" -import warnings -from typing import Optional, Tuple, Union - -import torch -import torch.nn.functional as F -from torch.distributions import Distribution, Gamma, Poisson, constraints -from torch.distributions.utils import ( - broadcast_all, - lazy_property, - logits_to_probs, - probs_to_logits, -) - - -def log_zinb_positive( - x: torch.Tensor, mu: torch.Tensor, theta: torch.Tensor, pi: torch.Tensor, eps=1e-8 -): - """ - Log likelihood (scalar) of a minibatch according to a zinb model. - Parameters - ---------- - x - Data - mu - mean of the negative binomial (has to be positive support) (shape: minibatch x vars) - theta - inverse dispersion parameter (has to be positive support) (shape: minibatch x vars) - pi - logit of the dropout parameter (real support) (shape: minibatch x vars) - eps - numerical stability constant - Notes - ----- - We parametrize the bernoulli using the logits, hence the softplus functions appearing. - """ - # theta is the dispersion rate. If .ndimension() == 1, it is shared for all cells (regardless of batch or labels) - if theta.ndimension() == 1: - theta = theta.view( - 1, theta.size(0) - ) # In this case, we reshape theta for broadcasting - - softplus_pi = F.softplus(-pi) # uses log(sigmoid(x)) = -softplus(-x) - log_theta_eps = torch.log(theta + eps) - log_theta_mu_eps = torch.log(theta + mu + eps) - pi_theta_log = -pi + theta * (log_theta_eps - log_theta_mu_eps) - - case_zero = F.softplus(pi_theta_log) - softplus_pi - mul_case_zero = torch.mul((x < eps).type(torch.float32), case_zero) - - case_non_zero = ( - -softplus_pi - + pi_theta_log - + x * (torch.log(mu + eps) - log_theta_mu_eps) - + torch.lgamma(x + theta) - - torch.lgamma(theta) - - torch.lgamma(x + 1) - ) - mul_case_non_zero = torch.mul((x > eps).type(torch.float32), case_non_zero) - - res = mul_case_zero + mul_case_non_zero - - return res - - -def log_nb_positive(x: torch.Tensor, mu: torch.Tensor, theta: torch.Tensor, eps=1e-8): - """ - Log likelihood (scalar) of a minibatch according to a nb model. - Parameters - ---------- - x - data - mu - mean of the negative binomial (has to be positive support) (shape: minibatch x vars) - theta - inverse dispersion parameter (has to be positive support) (shape: minibatch x vars) - eps - numerical stability constant - Notes - ----- - We parametrize the bernoulli using the logits, hence the softplus functions appearing. - """ - if theta.ndimension() == 1: - theta = theta.view( - 1, theta.size(0) - ) # In this case, we reshape theta for broadcasting - - log_theta_mu_eps = torch.log(theta + mu + eps) - - res = ( - theta * (torch.log(theta + eps) - log_theta_mu_eps) - + x * (torch.log(mu + eps) - log_theta_mu_eps) - + torch.lgamma(x + theta) - - torch.lgamma(theta) - - torch.lgamma(x + 1) - ) - - return res - - -def log_mixture_nb( - x: torch.Tensor, - mu_1: torch.Tensor, - mu_2: torch.Tensor, - theta_1: torch.Tensor, - theta_2: torch.Tensor, - pi_logits: torch.Tensor, - eps=1e-8, -): - """ - Log likelihood (scalar) of a minibatch according to a mixture nb model. - pi_logits is the probability (logits) to be in the first component. - For totalVI, the first component should be background. - Parameters - ---------- - x - Observed data - mu_1 - Mean of the first negative binomial component (has to be positive support) (shape: minibatch x features) - mu_2 - Mean of the second negative binomial (has to be positive support) (shape: minibatch x features) - theta_1 - First inverse dispersion parameter (has to be positive support) (shape: minibatch x features) - theta_2 - Second inverse dispersion parameter (has to be positive support) (shape: minibatch x features) - If None, assume one shared inverse dispersion parameter. - pi_logits - Probability of belonging to mixture component 1 (logits scale) - eps - Numerical stability constant - """ - if theta_2 is not None: - log_nb_1 = log_nb_positive(x, mu_1, theta_1) - log_nb_2 = log_nb_positive(x, mu_2, theta_2) - # this is intended to reduce repeated computations - else: - theta = theta_1 - if theta.ndimension() == 1: - theta = theta.view( - 1, theta.size(0) - ) # In this case, we reshape theta for broadcasting - - log_theta_mu_1_eps = torch.log(theta + mu_1 + eps) - log_theta_mu_2_eps = torch.log(theta + mu_2 + eps) - lgamma_x_theta = torch.lgamma(x + theta) - lgamma_theta = torch.lgamma(theta) - lgamma_x_plus_1 = torch.lgamma(x + 1) - - log_nb_1 = ( - theta * (torch.log(theta + eps) - log_theta_mu_1_eps) - + x * (torch.log(mu_1 + eps) - log_theta_mu_1_eps) - + lgamma_x_theta - - lgamma_theta - - lgamma_x_plus_1 - ) - log_nb_2 = ( - theta * (torch.log(theta + eps) - log_theta_mu_2_eps) - + x * (torch.log(mu_2 + eps) - log_theta_mu_2_eps) - + lgamma_x_theta - - lgamma_theta - - lgamma_x_plus_1 - ) - - logsumexp = torch.logsumexp(torch.stack((log_nb_1, log_nb_2 - pi_logits)), dim=0) - softplus_pi = F.softplus(-pi_logits) - - log_mixture_nb = logsumexp - softplus_pi - - return log_mixture_nb - - -def _convert_mean_disp_to_counts_logits(mu, theta, eps=1e-6): - r""" - NB parameterizations conversion. - Parameters - ---------- - mu - mean of the NB distribution. - theta - inverse overdispersion. - eps - constant used for numerical log stability. (Default value = 1e-6) - Returns - ------- - type - the number of failures until the experiment is stopped - and the success probability. - """ - if not (mu is None) == (theta is None): - raise ValueError( - "If using the mu/theta NB parameterization, both parameters must be specified" - ) - logits = (mu + eps).log() - (theta + eps).log() - total_count = theta - return total_count, logits - - -def _convert_counts_logits_to_mean_disp(total_count, logits): - """ - NB parameterizations conversion. - Parameters - ---------- - total_count - Number of failures until the experiment is stopped. - logits - success logits. - Returns - ------- - type - the mean and inverse overdispersion of the NB distribution. - """ - theta = total_count - mu = logits.exp() * theta - return mu, theta - - -def _gamma(theta, mu): - concentration = theta - rate = theta / mu - # Important remark: Gamma is parametrized by the rate = 1/scale! - gamma_d = Gamma(concentration=concentration, rate=rate) - return gamma_d - - -class NegativeBinomial(Distribution): - r""" - Negative binomial distribution. - One of the following parameterizations must be provided: - (1), (`total_count`, `probs`) where `total_count` is the number of failures until - the experiment is stopped and `probs` the success probability. (2), (`mu`, `theta`) - parameterization, which is the one used by scvi-tools. These parameters respectively - control the mean and inverse dispersion of the distribution. - In the (`mu`, `theta`) parameterization, samples from the negative binomial are generated as follows: - 1. :math:`w \sim \textrm{Gamma}(\underbrace{\theta}_{\text{shape}}, \underbrace{\theta/\mu}_{\text{rate}})` - 2. :math:`x \sim \textrm{Poisson}(w)` - Parameters - ---------- - total_count - Number of failures until the experiment is stopped. - probs - The success probability. - mu - Mean of the distribution. - theta - Inverse dispersion. - validate_args - Raise ValueError if arguments do not match constraints - """ - - arg_constraints = { - "mu": constraints.greater_than_eq(0), - "theta": constraints.greater_than_eq(0), - } - support = constraints.nonnegative_integer - - def __init__( - self, - total_count: Optional[torch.Tensor] = None, - probs: Optional[torch.Tensor] = None, - logits: Optional[torch.Tensor] = None, - mu: Optional[torch.Tensor] = None, - theta: Optional[torch.Tensor] = None, - validate_args: bool = False, - ): - self._eps = 1e-8 - if (mu is None) == (total_count is None): - raise ValueError( - "Please use one of the two possible parameterizations. Refer to the documentation for more information." - ) - - using_param_1 = total_count is not None and ( - logits is not None or probs is not None - ) - if using_param_1: - logits = logits if logits is not None else probs_to_logits(probs) - total_count = total_count.type_as(logits) - total_count, logits = broadcast_all(total_count, logits) - mu, theta = _convert_counts_logits_to_mean_disp(total_count, logits) - else: - mu, theta = broadcast_all(mu, theta) - self.mu = mu - self.theta = theta - super().__init__(validate_args=validate_args) - - @property - def mean(self): - return self.mu - - @property - def variance(self): - return self.mean + (self.mean ** 2) / self.theta - - def sample( - self, sample_shape: Union[torch.Size, Tuple] = torch.Size() - ) -> torch.Tensor: - with torch.no_grad(): - gamma_d = self._gamma() - p_means = gamma_d.sample(sample_shape) - - # Clamping as distributions objects can have buggy behaviors when - # their parameters are too high - l_train = torch.clamp(p_means, max=1e8) - counts = Poisson( - l_train - ).sample() # Shape : (n_samples, n_cells_batch, n_vars) - return counts - - def log_prob(self, value: torch.Tensor) -> torch.Tensor: - if self._validate_args: - try: - self._validate_sample(value) - except ValueError: - warnings.warn( - "The value argument must be within the support of the distribution", - UserWarning, - ) - - return log_nb_positive(value, mu=self.mu, theta=self.theta, eps=self._eps) - - def _gamma(self): - return _gamma(self.theta, self.mu) - - -class ZeroInflatedNegativeBinomial(NegativeBinomial): - r""" - Zero-inflated negative binomial distribution. - One of the following parameterizations must be provided: - (1), (`total_count`, `probs`) where `total_count` is the number of failures until - the experiment is stopped and `probs` the success probability. (2), (`mu`, `theta`) - parameterization, which is the one used by scvi-tools. These parameters respectively - control the mean and inverse dispersion of the distribution. - In the (`mu`, `theta`) parameterization, samples from the negative binomial are generated as follows: - 1. :math:`w \sim \textrm{Gamma}(\underbrace{\theta}_{\text{shape}}, \underbrace{\theta/\mu}_{\text{rate}})` - 2. :math:`x \sim \textrm{Poisson}(w)` - Parameters - ---------- - total_count - Number of failures until the experiment is stopped. - probs - The success probability. - mu - Mean of the distribution. - theta - Inverse dispersion. - zi_logits - Logits scale of zero inflation probability. - validate_args - Raise ValueError if arguments do not match constraints - """ - - arg_constraints = { - "mu": constraints.greater_than_eq(0), - "theta": constraints.greater_than_eq(0), - "zi_probs": constraints.half_open_interval(0.0, 1.0), - "zi_logits": constraints.real, - } - support = constraints.nonnegative_integer - - def __init__( - self, - total_count: Optional[torch.Tensor] = None, - probs: Optional[torch.Tensor] = None, - logits: Optional[torch.Tensor] = None, - mu: Optional[torch.Tensor] = None, - theta: Optional[torch.Tensor] = None, - zi_logits: Optional[torch.Tensor] = None, - validate_args: bool = False, - ): - - super().__init__( - total_count=total_count, - probs=probs, - logits=logits, - mu=mu, - theta=theta, - validate_args=validate_args, - ) - self.zi_logits, self.mu, self.theta = broadcast_all( - zi_logits, self.mu, self.theta - ) - - @property - def mean(self): - pi = self.zi_probs - return (1 - pi) * self.mu - - @property - def variance(self): - raise NotImplementedError - - @lazy_property - def zi_logits(self) -> torch.Tensor: - return probs_to_logits(self.zi_probs, is_binary=True) - - @lazy_property - def zi_probs(self) -> torch.Tensor: - return logits_to_probs(self.zi_logits, is_binary=True) - - def sample( - self, sample_shape: Union[torch.Size, Tuple] = torch.Size() - ) -> torch.Tensor: - with torch.no_grad(): - samp = super().sample(sample_shape=sample_shape) - is_zero = torch.rand_like(samp) <= self.zi_probs - samp[is_zero] = 0.0 - return samp - - def log_prob(self, value: torch.Tensor) -> torch.Tensor: - try: - self._validate_sample(value) - except ValueError: - warnings.warn( - "The value argument must be within the support of the distribution", - UserWarning, - ) - return log_zinb_positive(value, self.mu, self.theta, self.zi_logits, eps=1e-08) diff --git a/build/lib/scnym/interpret.py b/build/lib/scnym/interpret.py deleted file mode 100644 index 70c9c98..0000000 --- a/build/lib/scnym/interpret.py +++ /dev/null @@ -1,1368 +0,0 @@ -"""Tools for interpreting trained scNym models""" -import torch -import torch.nn as nn -import torch.nn.functional as F -import numpy as np -import pandas as pd -from scipy import sparse -import anndata - -# self -from .utils import build_classification_matrix, get_adata_asarray -from . import dataprep -from . import attributionpriors as attrprior - -# stdlib -import typing -import copy -import warnings -import logging -import time -from pathlib import Path - -logger = logging.getLogger(__name__) - - -class Salience(object): - """ - Performs backpropogation to compute gradients on a target - class with regards to an input. - - Notes - ----- - Saliency analysis computes a gradient on a target class - score :math:`f_i(x)` with regards to some input :math:`x`. - - - .. math:: - - S_i = \frac{\partial f_i(x)}{\partial x} - """ - - def __init__( - self, - model: nn.Module, - class_names: np.ndarray, - gene_names: np.ndarray = None, - layer_to_hook: int = None, - verbose: bool = False, - ) -> None: - """ - Performs backpropogation to compute gradients on a target - class with regards to an input. - - Parameters - ---------- - model : torch.nn.Module - trained scNym model. - class_names : np.ndarray - list of str names matching output nodes in `model`. - gene_names : np.ndarray, optional - gene names for the model. - layer_to_hook : int - index of the layer from which to record gradients. - defaults to the gene level input features. - - Returns - ------- - None. - """ - # ensure class names are unique for each output node - if len(np.unique(class_names)) != len(class_names): - msg = "`class_names` must all be unique." - raise ValueError(msg) - - self.class_names = np.array(class_names) - self.n_classes = len(class_names) - self.verbose = verbose - - # load model into CUDA compute if available - if torch.cuda.is_available(): - self.model = model.cuda() - else: - self.model = model - # ensure we're not in training mode - self.model = self.model.eval() - - self.gene_names = gene_names - - if layer_to_hook is None: - self._hook_first_layer_gradients() - else: - self._hook_nth_layer_gradients(n=layer_to_hook) - return - - def _hook_first_layer_gradients(self): - """Set up hooks to record gradients from the first linear - layer into a target tensor. - - References - ---------- - https://pytorch.org/docs/stable/nn.html#torch.nn.Module.register_backward_hook - """ - - def _record_gradients(module, grad_in, grad_out): - """Record gradients of a layer with the correct input - shape""" - self.gradients = grad_in[1] - if self.verbose: - print([x.size() if x is not None else "None" for x in grad_in]) - print("Hooked gradients to: ", module) - - for module in self.model.modules(): - if isinstance(module, nn.Linear) and module.in_features == len( - self.gene_names - ): - module.register_backward_hook(_record_gradients) - return - - def _hook_nth_layer_gradients(self, n: int): - """Set up hooks to record gradients from an arbitrary layer. - - References - ---------- - https://pytorch.org/docs/stable/nn.html#torch.nn.Module.register_backward_hook - """ - - def _record_gradients(module, grad_in, grad_out): - """Record gradients of a layer with the correct input - shape""" - self.gradients = grad_in[1] - if self.verbose: - print([x.size() if x is not None else "None" for x in grad_in]) - print("Hooked gradients to: ", module) - - module = list(self.model.modules())[n] - module.register_backward_hook(_record_gradients) - return - - def _guided_backprop_hooks(self): - """Set up forward and backward hook functions to perform - "Guided backpropogation" - - Notes - ----- - Guided backpropogation only passes positive gradients upward through the network. - - Normal backprop: - - .. math:: - - f_i^{(l + 1)} = ReLU(f_i^{(l)}) - - R_i^{(l)} = (f_i^{(l)} > 0) \cdot R_i^{(l+1)} - - where - - .. math:: - - R_i^{(l + 1)} = \frac{\partial f_{out}}{\partial f_i^{l + 1}} - - - By contrast, guided backpropogation only passes gradient values where both - the activates :math:`f_i^{(l)}` and the gradients :math:`R_i^{(l + 1)}` are - greater than :math:`0`. - - - References - ---------- - https://arxiv.org/pdf/1412.6806.pdf - - https://pytorch.org/docs/stable/nn.html#torch.nn.Module.register_forward_hook - https://pytorch.org/docs/stable/nn.html#torch.nn.Module.register_backward_hook - """ - - def _record_relu_outputs(module, in_, out_): - """Store the outputs to each ReLU layer""" - self.rectified_outputs.append( - out_, - ) - self.store_rectified_outputs.append( - out_, - ) - - def _clamp_grad(module, grad_in, grad_out): - """Clamp ReLU gradients to [0, inf] and return a - new gradient to be used in subsequent outputs. - """ - self.store_grad.append(grad_in[0]) - - grad = grad_in[0].clamp(min=0.0) - self.store_clamped_grad.append(grad) - - # here we pop the outputs off to ensure that the - # final output is always the current ReLU layer - # we're investigating - last_relu_output = self.rectified_outputs.pop() - last_relu_output = copy.copy(last_relu_output) - last_relu_output[last_relu_output > 0] = 1 - rectified_grad = last_relu_output * grad - - self.store_rectified_grad.append(rectified_grad) - return (rectified_grad,) - - self.store_rectified_outputs = [] - self.store_grad = [] - self.store_clamped_grad = [] - - for _, module in self.model.named_modules(): - if isinstance(module, nn.ReLU): - module.register_forward_hook(_record_relu_outputs) - module.register_backward_hook(_clamp_grad) - - return - - def get_saliency( - self, - x: torch.FloatTensor, - target_class: str, - guide_backprop: bool = False, - ) -> torch.FloatTensor: - """Compute the saliency of a target class on an input - vector `x`. - - Parameters - ---------- - x : torch.FloatTensor - [1, Genes] vector of gene expression. - target_class : str - class in `.class_names` for which to compute gradients. - guide_backprop : bool - perform "guided backpropogation" by clamping gradients - to only positive values at each ReLU. - see: https://arxiv.org/pdf/1412.6806.pdf - - Returns - ------- - salience : torch.FloatTensor - gradients on `target_class` with respect to `x`. - """ - if target_class not in self.class_names: - msg = f"{target_class} is not in `.class_names`" - raise ValueError(msg) - - target_idx = np.where(target_class == self.class_names)[0].astype(np.int) - target_idx = int(target_idx) - - self.model.zero_grad() - - if guide_backprop: - self.rectified_outputs = [] - self.store_rectified_grad = [] - self._guided_backprop_hooks() - - # store gradients on the input - if torch.cuda.is_available(): - x = x.cuda() - x.requires_grad = True - - # module hook will record gradients here - self.gradients = torch.zeros_like(x) - - # forward pass - output = self.model(x) - - # create a [N, C] tensor to store gradients - target = torch.zeros_like(output) - # set the target class to `1`, creating a one-hot - # of the target class - target[:, target_idx] = 1 - - # compute gradients with backprop - output.backward( - gradient=target, - ) - - # detach from the graph and move to main memory - target = target.detach().cpu() - - return self.gradients - - def rank_genes_by_saliency( - self, - **kwargs, - ) -> np.ndarray: - """ - Rank genes by saliency for a target class and input. - - Passes **kwargs to `.get_saliency` and uses the output - to rank genes. - - Returns - ------- - ranked_genes : np.ndarray - gene names with high saliency, ranked highest to - lowest. - """ - s = self.get_saliency(**kwargs) - sort_idx = torch.argsort(s) - idx = sort_idx[0].numpy()[::-1] - return self.gene_names[idx.astype(np.int)] - - -class IntegratedGradient(object): - def __init__( - self, - model: nn.Module, - class_names: typing.Union[list, np.ndarray], - gene_names: typing.Union[list, np.ndarray] = None, - grad_activation: str = "input", - verbose: bool = False, - ) -> None: - """Performs integrated gradient computations for feature attribution - in scNym models. - - Parameters - ---------- - model : torch.nn.Module - trained scNym model. - class_names : list or np.ndarray - list of str names matching output nodes in `model`. - gene_names : list or np.ndarray, optional - gene names for the model. - grad_activation : str - activations where gradients should be collected. - default "input" collects gradients at the level of input features. - verbose : bool - verbose outputs for stdout. - - Returns - ------- - None. - - Notes - ----- - Integrated gradients are computed as the path integral between a "baseline" - gene expression vector (all 0 counts) and an observed gene expression vector. - The path integral is computed along a straight line in the feature space. - - Stated formally, we define a our baseline gene expression vector as :math:`x`, - our observed vector as :math:`x'`, an scnym model :math:`f(\cdot)`, and a - number of steps :math:`M` for approximating the integral by Reimann sums. - - The integrated gradient :math:`\int \nabla` for a feature :math:`x_i` is then - - .. math:: - - r = \sum_{m=1}^M \partial f(x' + \frac{m}{M}(x - x')) / \partial x_i \\ - \int \nabla_i = (x_i' - x_i) \frac{1}{M} r - """ - self.model = copy.deepcopy(model) - if torch.cuda.is_available(): - self.model = self.model.cuda() - print("Model loaded on CUDA compute device.") - self.model.zero_grad() - for param in self.model.parameters(): - param.requires_grad = False - - # get gradients on the specified layer activation if - # the specified layer is not "input" - self.grad_activation = grad_activation - - if grad_activation == "input": - self.get_grad = self._get_grad_input - elif grad_activation == "first_layer": - self.get_grad = self._get_grad_first_layer - self.input2first = nn.Sequential(*list(model.modules())[3:7]) - self.first2output = nn.Sequential(*list(model.modules())[7:]) - else: - msg = f"`grad_activation={grad_activation}` is not implemented." - raise NotImplementedError(msg) - - self.class_names = class_names - self.gene_names = gene_names - self.verbose = verbose - self.grads_for_class = {} - - if type(self.class_names) == np.ndarray: - self.class_names = self.class_names.tolist() - - return - - def _get_grad_input( - self, - x: torch.Tensor, - target_class: str, - ) -> typing.Tuple[torch.Tensor, torch.Tensor]: - """Get the gradient on the observed features with respect - to a target class. - - Parameters - ---------- - x : torch.Tensor - [Batch, Features] input tensor. - target_class : str - target class for gradient computation. - - Returns - ------- - grad : torch.Tensor - [Batch, Features] feature gradients with respect to the - target class. - target : torch.Tensor - [Batch,] value of the target class score. - """ - target_idx = self.class_names.index(target_class) - - # store gradients on the input - if torch.cuda.is_available(): - x = x.cuda() - x.requires_grad = True - - # forward pass through the model - output = self.model(x) - sm_output = F.softmax(output, dim=-1) - - # get the softmax output on the target class for each - # observation as a loss - index = torch.ones(output.size(0)).view(-1, 1) * target_idx - index = index.long() - index = index.to(device=sm_output.device) - # `.gather(dim, index)` takes a dimension number and a tensor - # of indices size [Batch,] where each val is an integer index - # grabs the specific element for each observation along the given dim. - target = sm_output.gather(1, index) - - # zero any existing gradients - self.model.zero_grad() - if x.grad is not None: - x.grad.zero_() - target.backward() - - grad = x.grad.detach().cpu() - - return grad, target - - def _catch_grad(self, grad) -> None: - """Hook to catch gradients from an activation - of interest.""" - self.caught_grad = grad.detach() - return - - def _get_grad_first_layer( - self, - x: torch.Tensor, - target_class: str, - ): - """Get the gradient on the first layer activations. - - Parameters - ---------- - x : torch.Tensor - [Batch, Features] input tensor. e.g. first layer - embedding coordinates to pass to the rest of the model. - target_class : str - target class for gradient computation. - - Returns - ------- - grad : torch.Tensor - [Batch, Features] feature gradients with respect to the - target class. - target : torch.Tensor - [Batch,] value of the target class score. - """ - target_idx = self.class_names.index(target_class) - # store gradients on the input - if torch.cuda.is_available(): - x = x.cuda() - x.requires_grad = True - - # forward through the activation embedder - x.register_hook(self._catch_grad) - # forward through to outputs - output = self.first2output(x) - sm_output = F.softmax(output, dim=-1) - - # get the softmax output on the target class for each - # observation as a loss - index = torch.ones(output.size(0)).view(-1, 1) * target_idx - index = index.long() - index = index.to(device=sm_output.device) - # `.gather(dim, index)` takes a dimension number and a tensor - # of indices size [Batch,] where each val is an integer index - # grabs the specific element for each observation along the given dim. - target = sm_output.gather(1, index) - - # zero any existing gradients - self.model.zero_grad() - if x.grad is not None: - x.grad.zero_() - - target.backward() - grad = self.caught_grad - - return grad, target - - def _check_integration( - self, - integrated_grad: torch.Tensor, - ) -> bool: - """Check that the approximation of the path integral is appropriate. - If we used a sufficient number of steps in the Reimann sum, we should - find that the gradient sum is roughly equivalent to the difference in - class scores for the baseline vector and target vector. - """ - score_difference = self.raw_scores[-1] - self.raw_scores[0] - check = torch.isclose( - integrated_grad.sum(), - score_difference, - rtol=0.1, - ) - if not check: - msg = "integrated gradient magnitude does not match the difference in scores.\n" - msg += f"magnitude {integrated_grad.sum().item()} vs. {score_difference.item()}.\n" - msg += "consider using more steps to estimate the path integral." - warnings.warn(msg) - return check - - def get_integrated_gradient( - self, - x: torch.Tensor, - target_class: str, - M: int = 300, - baseline: torch.Tensor = None, - ) -> torch.Tensor: - """Compute the integrated gradient for a single observation. - - Parameters - ---------- - x : torch.Tensor - [Features,] input tensor. - target_class : str - class in `self.class_names` for optimization. - M : int - number of gradient steps to use when approximating - the path integral. - baseline : torch.Tensor - [Features,] baseline gene expression vector to use. - if `None`, uses the `0` vector. - - Returns - ------- - integrated_grad : torch.Tensor - [Features,] integrated gradient tensor. - - Notes - ----- - 1. Define a difference between the baseline input and observation. - 2. Approximate a linear path between the baseline and observation - with `M` steps. - 3. Compute the gradient at each step in the path. - 4. Sum gradients across steps and divide by number of steps. - 5. Elementwise multiply with input features as in saliency. - """ - if baseline is None: - n_dims = ( - len(self.gene_names) - if self.grad_activation == "input" - else self.model.n_hidden_init - ) - - if self.verbose: - print("Using the 0-vector as a baseline.") - base = self.baseline_input = torch.zeros((1, n_dims)).float() - else: - base = self.baseline_input = baseline - if base.dim() > 1 and base.size(0) != 1: - msg = "baseline must be a single gene expression vector" - raise ValueError(msg) - base = base.view(1, -1) - - self.target_class = target_class - - if x.dim() > 1 and x.size(0) == 1: - # tensor has an empty batch dimension, flatten it - x = x.view(-1) - - # create a batch of observations where each observation is - # a single step along the path integral - path = base.repeat((M, 1)) - - # if `first_layer` activations are used, x_activ is the relevant - # activation setting for saliency - if self.grad_activation == "first_layer": - x = x.to(device=list(self.input2first.parameters())[0].device) - x_rel = self.input2first(x.view(1, -1)).detach().cpu() - else: - x_rel = x - self.x_rel = x_rel - - # create a tensor marking the "step number" for each observation - step = ((x_rel - base) / M).view(1, -1) - step_coord = torch.arange(1, M + 1).view(-1, 1).repeat((1, path.size(1))) - - # add the correct number of steps to fill the path tensor - path += step * step_coord - - if self.verbose: - print("baseline", base.size()) - print(base.sort()) - print("observation", x.size()) - print(x.sort()) - print() - print("step : ", step.size()) - print(step) - print("step_coord : ", step_coord.size()) - print(step_coord) - print("path : ", path.size()) - print(path[0].sort()) - print("-" * 3) - print(path[-1].sort()) - - # compute the gradient on the input at each step - # along the path - grad_dim = ( - path.size(1) - if self.grad_activation == "input" - else self.model.n_hidden_init - ) - gradients = torch.zeros((path.size(0), grad_dim)) - scores = torch.zeros(path.size(0)) - - for m in range(M): - gradients[m, :], target_scores = self.get_grad( - path[m, :].view(1, -1), - self.target_class, - ) - scores[m] = target_scores - - self.raw_gradients = gradients - self.raw_scores = scores - self.path = path - - # sum gradients and normalize by step number - integrated_grad = x_rel * (gradients.sum(0) / M) - - self._check_integration(integrated_grad) - - return integrated_grad - - def get_gradients_for_class( - self, - adata: anndata.AnnData, - groupby: str, - target_class: str, - reference_class: str = None, - n_cells: int = None, - *args, - **kwargs, - ) -> pd.DataFrame: - """Get integrated gradients for a target class given - an AnnData experiment. - - Parameters - ---------- - adata : anndata.AnnData - [Cells, Features] experiment. - groupby : str - column in `adata.obs` containing class names. - target_class : str - class in `self.class_names` and `adata.obs[groupby]` - for optimization. - reference_class : str - reference class in `self.class_names`. "all" uses all - non-target classes as a reference. - n_cells : int - number of cells to use to compute a characteristic - integrated gradient. - if `None`, uses all cells. - *args, **kwargs : dict - passed to `self.get_integrated_gradient`. - - Returns - ------- - gradients : pd.DataFrame - [Cells, Features] integrated gradients. - Sets `self.grads_for_class[target_class]` with the value - of `gradients`. - - See Also - -------- - get_integrated_gradient - """ - if not np.all(adata.var_names == self.gene_names): - # gene names don't match, check if IG names are a subset - shared_genes = np.intersect1d( - adata.var_names, - self.gene_names, - ) - if len(shared_genes) < len(self.gene_names): - # some genes are missing - msg = "Not all genes in `gene_names` were found in `adata`." - raise ValueError(msg) - else: - # subset adata to the gene set used - # this will also handle gene name permutations - adata = adata[:, self.gene_names] - - if groupby not in adata.obs_keys(): - msg = f"{groupby} not in `adata.obs` columns." - raise ValueError(msg) - - groups = np.unique(adata.obs[groupby]) - if target_class not in groups: - msg = f"`{target_class}` is not a class in `{groupby}`" - raise ValueError(msg) - if target_class not in self.class_names: - msg = f"`{target_class}` is not a class in `self.class_names`" - raise ValueError(msg) - - # get the indices for cells of the target class - cell_idx = np.where(adata.obs[groupby] == target_class)[0].astype(np.int) - if n_cells is not None: - if n_cells < len(cell_idx): - # subset if a specific number of cells was specified - cell_idx = np.random.choice( - cell_idx, - size=n_cells, - replace=False, - ) - msg = f"Using {n_cells} cells for integrated gradient analysis." - logger.debug(msg) - else: - msg = f"n_cells {n_cells} > n_cells_in_class {len(cell_idx)}.\n" - msg += "Using all available cells." - logger.warning(msg) - - # compute integrated gradients - grads = [] - for i, idx in enumerate(cell_idx): - x = adata.X[idx, :] - if type(x) == np.matrix: - x = np.array(x) - if type(x) == sparse.csr_matrix: - x = x.toarray() - if type(x) != np.ndarray: - msg = "gene vector was not coerced to np.ndarray" - raise TypeError(msg) - x = x.flatten() - x = torch.from_numpy(x).float() - - g = self.get_integrated_gradient( - x=x, - target_class=target_class, - *args, - **kwargs, - ) - grads.append(g.view(-1)) - - logger.debug(f"x size: {x.size()}") - logger.debug(f"g size: {g.size()}") - - G = torch.stack(grads, dim=0).cpu().numpy() - - if self.grad_activation == "input": - col_names = self.gene_names - else: - col_names = [f"z_{i}" for i in range(G.shape[1])] - - gradients = pd.DataFrame( - G, - columns=col_names, - index=adata.obs_names[cell_idx], - ) - - self.grads_for_class[target_class] = gradients - - return gradients - - def get_top_features_from_gradients( - self, - target_class: str = None, - gradients: pd.DataFrame = None, - ) -> np.ndarray: - """Get the top features from a set of pre-computed integrated - gradients. - - Parameters - ---------- - target_class : str - target class with gradients stored in `self.grads_for_class[target_class]`. - gradients : pd.DataFrame - [Cells, Features] integrated gradients to use. If provided, supercedes - `target_class`. - - Returns - ------- - top_features : np.ndarray - [Features,] sorted [High, Low] values. - i.e. `top_features[0]` is the top feature. - """ - if target_class is None and gradients is None: - raise ValueError("must provide `gradients` or `target_class`") - - # `if gradients is not None`, use gradients instead of - # the stored gradients regardless of whether or not - # target_class as provided - if gradients is None: - gradients = self.grads_for_class[target_class] - logger.debug(f"Using stored gradients for {target_class}") - - grad_means = gradients.mean(0) - sort_idx = np.argsort(grad_means)[::-1] # high to low - - top_features = self.gene_names[sort_idx] - return top_features - - -class ExpectedGradient(object): - def __init__( - self, - model: nn.Module, - class_names: typing.Union[list, np.ndarray], - gene_names: typing.Union[list, np.ndarray] = None, - verbose: bool = False, - ) -> None: - """Performs expected gradient computations for feature attribution - in scNym models. - - Parameters - ---------- - model : torch.nn.Module - trained scNym model. - class_names : list or np.ndarray - list of str names matching output nodes in `model`. - gene_names : list or np.ndarray, optional - gene names for the model. - verbose : bool - verbose outputs for stdout. - - Returns - ------- - None. - - Notes - ----- - Integrated gradients are computed as the path integral between a "baseline" - gene expression vector (all 0 counts) and an observed gene expression vector. - The path integral is computed along a straight line in the feature space. - - Stated formally, we define a our baseline gene expression vector as :math:`x`, - our observed vector as :math:`x'`, an scnym model :math:`f(\cdot)`, and a - number of steps :math:`M` for approximating the integral by Reimann sums. - - The integrated gradient :math:`\int \nabla` for a feature :math:`x_i` is then - - .. math:: - - r = \sum_{m=1}^M \partial f(x' + \frac{m}{M}(x - x')) / \partial x_i \\ - \int \nabla_i = (x_i' - x_i) \frac{1}{M} r - """ - self.model = model - if torch.cuda.is_available(): - self.model = self.model.cuda() - logger.info("Model loaded on CUDA device for E[Grad] estimation.") - self.model.zero_grad() - for param in self.model.parameters(): - param.requires_grad = False - - self.model_device = list(self.model.parameters())[0].device - - self.class_names = np.array(class_names) - self.gene_names = np.array(gene_names) - self.verbose = verbose - self.grads_for_class = {} - # define the values for `source` that will trigger using all data as the - # reference dataset - self.background_vals = ( - "all", - None, - ) - - return - - def _check_inputs( - self, - adata: anndata.AnnData, - source: str, - target: str, - cell_type_col: str, - ) -> anndata.AnnData: - """Check that inputs match model expectations. - - Parameters - ---------- - adata : anndata.AnnData - [Cells, Genes] - source : str - class name for source class. - target : str - class name for target class. - cell_type_col : str - column in `adata.obs` containing cell type labels. - - Returns - ------- - adata : anndata.AnnData - [Cells, len(self.gene_names)] experiment. - modifies anndata to match model training gene names - if necessary. - """ - # check cell type arguments - if cell_type_col not in adata.obs.columns: - msg = f"{cell_type_col} is not a column in `adata.obs`" - raise ValueError(msg) - self.cell_type_col = cell_type_col - - cell_types = np.unique(adata.obs[self.cell_type_col]) - if source not in cell_types and source not in self.background_vals: - msg = f"{source} not in the detected cell types or background values." - raise ValueError(msg) - if target not in cell_types: - msg = f"{target} not in the detected cell types." - raise ValueError(msg) - - # check that genes match the training gene names - match = np.all(np.array(adata.var_names) == np.array(self.gene_names)) - if not match: - msg = "Gene names for model and `adata` query do not match.\n" - msg += "\t Coercing..." - logger.warn(msg) - X = build_classification_matrix( - X=get_adata_asarray( - adata, - ), - model_genes=np.array(self.gene_names), - sample_genes=np.array(adata.var_names), - gene_batch_size=1024, - ) - adata2 = anndata.AnnData( - X=X, - obs=adata.obs.copy(), - ) - adata2.var_names = self.gene_names - else: - logger.debug("Model and query gene names match.") - adata2 = adata - - return adata2 - - def _get_exp_grad( - self, - model: torch.nn.Module, - input_: torch.FloatTensor, - target: torch.LongTensor, - ) -> torch.FloatTensor: - """Get expected gradients from the input layer""" - exp_grad = self.APExp.shap_values( - model, - input_, - sparse_labels=target, - ) - return exp_grad - - def _setup_dataset(self, X, y, adata=None) -> None: - """Setup `Dataset` and `DataLoader`classes for train - and validation data. - - Returns - ------- - None. - Sets `.train_ds`, `.val_ds` and `.train_dl`, `.val_dl`. - """ - self.n_cell_types = len(np.unique(y)) - self.n_genes = X.shape[1] - - self.y_orig = y - y = np.array(pd.Categorical(y, categories=np.unique(y)).codes) - self.y = y - self.y_categories = np.unique(self.y_orig) - # setup dataset, model, and training components - # for querying, we also set a dataset with all of the data - self.all_ds = dataprep.SingleCellDS( - X=X, - y=np.array(y), - ) - self.all_dl = torch.utils.data.DataLoader( - self.all_ds, - batch_size=self.batch_size, - shuffle=False, - drop_last=False, - ) - - return - - def query( - self, - adata: anndata.AnnData, - target: str, - source: str = "all", - cell_type_col: str = "cell_ontology_class", - batch_size: int = 512, - n_batches: int = 100, - n_cells: int = None, - ) -> pd.DataFrame: - """Find the features that distinguish `target` cells from `source` cells - using expected gradient estimation. - - Parameters - ---------- - adata : anndata.AnnData - [Cells, Genes] - target : str - class name for target class. - expected gradients highlight important features that define this cell type. - source : str - class name for source class to use as reference cells for expected - gradient estimation. - if `"all"` or `None`, uses all cells in `adata` as possible references. - cell_type_col : str - column in `adata.obs` containing cell type labels. - n_batches : int - number of reference batches to draw for each target sample. - n_cells : int - number of target samples to use for E[G] estimation. - if `None`, uses all available samples. - - Returns - ------- - saliency : pd.DataFrame - [Genes, 1] mean expected gradient across cells used for - estimation for each gene. - """ - self.batch_size = batch_size - adata = self._check_inputs( - adata=adata, - source=source, - target=target, - cell_type_col=cell_type_col, - ) - self._setup_dataset(adata.X, adata.obs[cell_type_col], adata=adata) - self.model.train(False) - - target_bidx = adata.obs[self.cell_type_col] == target - if source in self.background_vals: - source_bidx = np.ones(adata.shape[0], dtype=np.bool) - # ensure target cells aren't in the source data - source_bidx[target_bidx] = False - else: - source_bidx = adata.obs[self.cell_type_col] == source - # regenerate labels in case the query dataset is different from the - # training dataset - class_names = self.class_names.tolist() - target_y = np.array( - [class_names.index(target)] * sum(target_bidx), - dtype=np.int32, - ) - source_y = adata.obs.loc[source_bidx, self.cell_type_col].tolist() - source_y = np.array( - [class_names.index(x) for x in source_y], - dtype=np.int32, - ) - - source_adata = adata[source_bidx, :].copy() - target_adata = adata[target_bidx, :].copy() - logging.info(f"Subset adata to {target_adata.shape[0]} target cells.") - - if n_cells is not None: - target_idx = np.random.choice( - np.arange(target_adata.shape[0]), - size=n_cells, - replace=target_adata.shape[0] < n_cells, - ).astype(np.int32) - else: - target_idx = np.arange(target_adata.shape[0]) - - target_ds = dataprep.SingleCellDS( - X=target_adata.X[target_idx], - y=target_y[target_idx], - ) - logging.info( - f"Using {target_ds.X.shape[0]} target cells for expgrad estimation." - ) - # save the cell indices in attributes - self._query_cell_obs_names = pd.DataFrame( - { - "names": source_adata.obs_names.tolist() - + target_adata.obs_names[target_idx].tolist(), - "dataset": ["source"] * source_adata.shape[0] - + ["target"] * len(target_idx), - }, - ) - - # make sure the source dataset has at least as many examples as - # the target by replicating at random - n_reps = int(np.ceil(sum(target_bidx) / sum(source_bidx))) - source_indices = np.arange(source_adata.X.shape[0]) - source_indices = np.tile(source_indices, (n_reps,)) - source_ds = dataprep.SingleCellDS( - X=source_adata.X[source_indices], - y=source_y[source_indices], - ) - - batch_size = min(self.batch_size, len(target_idx)) - target_dl = torch.utils.data.DataLoader( - target_ds, - batch_size=batch_size, - shuffle=False, - drop_last=self.batch_size == batch_size, - ) - - # use only the source samples as references if specified - # otherwise, use the whole training set - self.APExp = attrprior.AttributionPriorExplainer( - source_ds, - batch_size=batch_size, - k=1, - input_batch_index="input", - ) - logger.debug("Set up Attribution Prior Explainer") - gradients_by_batch = [] - for input_batch in target_dl: - batch_grads = [] - input_ = input_batch["input"].to(device=self.model_device) - _, labels = torch.max(input_batch["output"], dim=1) - labels = labels.to(device=self.model_device).long() - # for each input, use `n_batches` different random references - for i in range(n_batches): - s = time.time() - logger.debug(f"gradient batch {i}") - g = self._get_exp_grad( - self.model, - input_, - target=labels, - ) - g = g.detach() - batch_grads.append(g.detach().cpu()) - e = time.time() - logger.debug(f"time: {e-s} secs") - # [Obs, Features, estimation_batch] - batch_grads = torch.stack(batch_grads, dim=-1) - batch_grads = torch.mean(batch_grads, dim=-1) - - gradients_by_batch.append(batch_grads) - gradients = torch.cat(gradients_by_batch, dim=0) - gradients = gradients.detach().cpu().numpy() - - gradients = pd.DataFrame( - gradients, - index=target_adata.obs_names[target_idx][: gradients.shape[0]], - ) - if gradients.shape[1] == len(adata.var_names): - gradients.columns = adata.var_names.tolist() - - # compute mean gradients across cells - saliency = gradients.mean(0).sort_values(ascending=False) - saliency.columns = ["exp_grad"] - - self.saliency = saliency - self.gradients = gradients - return saliency - - def save_query( - self, - path: str, - ) -> None: - """Save intermediary representations generated during a - `query` call""" - if path is None: - return - # save query outputs - saliency_path = str(Path(path) / Path("saliency.csv")) - self.saliency.to_csv(saliency_path) - gradients_path = str(Path(path) / Path("gradients.csv")) - self.gradients.to_csv(gradients_path) - obs_names_path = str(Path(path) / Path("obs_names.csv")) - self._query_cell_obs_names.to_csv(obs_names_path) - return - - -class ClassificationEntropy(object): - def __init__(self, reduce: str = "mean") -> None: - """Compute the entropy of a classification probability vector""" - self.reduce = reduce - return - - def __call__(self, x: torch.FloatTensor) -> torch.FloatTensor: - """Compute entropy for a probability tensor `x` - - Parameters - ---------- - x : torch.FloatTensor - [Cells, Classes] probability tensor - - Returns - ------- - H : torch.FloatTensor - either [Cells,] or [1,] if `reduce is not None`. - """ - H = -1 * torch.sum(x * torch.log(x), dim=1) - if self.reduce == "mean": - H = torch.mean(H) - return H - - -class Tesseract(IntegratedGradient): - """Tessaract finds a path from a source vector in feature - space to a destination vector. - - Attributes - ---------- - model : torch.nn.Module - trained scNym model. - class_names : list or np.ndarray - list of str names matching output nodes in `model`. - gene_names : list or np.ndarray, optional - gene names for the model. - grad_activation : str - activations where gradients should be collected. - default "input" collects gradients at the level of input features. - verbose : bool - verbose outputs for stdout. - energy_criterion : Callable - criterion to evaluate the potential energy of a gene - expression state given args `model` and `x` where - `x` is a gene expression vector. - optimizer : torch.optim.Optimizer - optimizer for finding paths through gene expression - space using a parametric gene expression vector. - """ - - def __init__( - self, - *, - energy_criterion: typing.Callable, - optimizer_class: typing.Callable, - **kwargs, - ) -> None: - """Tessaract finds a path from a source vector in feature - space to a destination vector that maximizes the likelihood - of observing each intermediate position using a trained - classification model. - - Parameters - ---------- - energy_criterion : Callable - criterion to evaluate the potential energy of a gene - expression state given args `model` and `x` where - `x` is a gene expression vector. - optimizer_class : Callable - function to initialize a `torch.optim.Optimizer`. - - Returns - ------- - None - """ - super(Tesseract, self).__init__(**kwargs) - self.energy_criterion = energy_criterion - self.optimizer_class = optimizer_class - return - - def find_path( - self, - adata: anndata.AnnData, - groupby: str, - source_class: str, - target_class: str, - energy_weight: float = 1.0, - n_epochs: int = 500, - tol: float = 1.0, - patience: int = 10, - ) -> torch.FloatTensor: - """Find a path between a source and target cell class - given an AnnData experiment containing both. - - Parameters - ---------- - adata : anndata.AnnData - [Cells, Features] experiment. - groupby : str - column in `adata.obs` containing class names. - source_class : str - class in `self.class_names` and `adata.obs[groupby]` - for initialization. - target_class : str - class in `self.class_names` and `adata.obs[groupby]` - for optimization. - energy_weight : float, optional - weight for the energy criterion relative to the class - scores. - n_epochs : int, optional - number of epochs for optimization. - tol : float, optional - minimum L2 difference across epochs to consider - the optimization to be progressing. - patience : int, optional - number of epochs to wait before early stopping. - - Returns - ------- - path : torch.FloatTensor - [epochs, Features] path through gene expression space. - Sets `self.last_path` with the value of path. - """ - - source_cell_idx = adata.obs[groupby] == source_class - source_mean = torch.from_numpy( - np.array(adata[source_cell_idx, :].X.mean(0)) - ).float() - model_device = list(self.model.parameters())[0].device - source_mean = source_mean.to(device=model_device) - - if self.grad_activation == "first_layer": - # we're using first layer embeddings as the relevant - # space for integrated gradient computation and - # optimization - source_mean2use = self.input2first(source_mean) - self.scoring_model = self.first2output - else: - source_mean2use = source_mean - self.scoring_model = self.model - - # initialize the gene expression vector at the source - x = copy.deepcopy(source_mean2use) - self.optimizer = self.optimizer_class({"params": x, "name": "expression_path"}) - - # perform optimization to the target class while - # minimizing an energy criterion - def loss( - x, - ): - target_idx = self.class_names.index(target_class) - source_idx = self.class_names.index(source_class) - outputs = self.scoring_model( - x, - ) - probs = torch.nn.functional.softmax(outputs, dim=1) - energy = ( - self.energy_criterion( - x, - ) - * energy_weight - ) - l = (probs[source_idx] - probs[target_idx]) + energy - return l - - # intialize path collector and set the `waiting_epochs` - # for early stopping to an initial zero value - path_points = [] - waiting_epochs = 0 - logger.info("Beginning pathfinding optimization") - for epoch in range(n_epochs): - # save path locations - path_points.append(copy.deepcopy(x.detach().cpu())) - - # compute loss and perform an update step - l = loss( - x, - ) - self.optimizer.zero_grad() - l.backward() - self.optimizer.step() - - # check if x is changing substantially - delta = x.data - path_points[-1] - l2 = torch.norm(delta, p=2) - if l2 < tol and waiting_epochs > patience: - msg = f"\tchange in l2 < {tol} for {patience} epochs\n" - msg += "\tending optimizing." - logger.warning(msg) - elif l2 < tol: - waiting_epochs += 1 - else: - waiting_epochs = 0 - - path = torch.cat(path_points, dim=0) - self.last_path = path - return path diff --git a/build/lib/scnym/losses.py b/build/lib/scnym/losses.py deleted file mode 100644 index 8b950a4..0000000 --- a/build/lib/scnym/losses.py +++ /dev/null @@ -1,1838 +0,0 @@ -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -import logging -from typing import Callable, Union, Iterable, Tuple -from .dataprep import SampleMixUp -from .model import CellTypeCLF, DANN, AE -from .distributions import NegativeBinomial -import copy - -logger = logging.getLogger(__name__) - - -class MultiTaskCriterion(object): - def __init__( - self, - ) -> None: - """Abstraction for MultiTask losses - - Note: Depreceated, inheriting from `torch.nn.Module` now. - """ - return - - def train(self, on: bool) -> None: - """Toggle the training mode of learned parameters""" - return - - def eval( - self, - ) -> None: - """Disable training of learned parameters""" - self.train(on=False) - return - - -def get_class_weight( - y: np.ndarray, -) -> np.ndarray: - """Generate relative class weights based on the representation - of classes in a label vector `y` - - Parameters - ---------- - y : np.ndarray - [N,] vector of class labels. - - Returns - ------- - class_weight : np.ndarray - [Classes,] vector of loss weight coefficients. - if classes are `str`, returns weights in lexographically - sorted order. - - """ - # find all unique class in y and their counts - u_classes, class_counts = np.unique(y, return_counts=True) - # compute class proportions - class_prop = class_counts / len(y) - # invert proportions to get class weights - class_weight = 1.0 / class_prop - # normalize so that the minimum value is 1. - class_weight = class_weight / class_weight.min() - return class_weight - - -def cross_entropy( - pred_: torch.FloatTensor, - label: torch.FloatTensor, - class_weight: torch.FloatTensor = None, - sample_weight: torch.FloatTensor = None, - reduction: str = "mean", -) -> torch.FloatTensor: - """Compute cross entropy loss for prediction outputs - and potentially non-binary targets. - - Parameters - ---------- - pred_ : torch.FloatTensor - [Batch, C] model outputs. - label : torch.FloatTensor - [Batch, C] labels. may not necessarily be one-hot, - but must satisfy simplex criterion. - class_weight : torch.FloatTensor - [C,] relative weights for each of the output classes. - useful for increasing attention to underrepresented - classes. - reduction : str - reduction method across the batch. - - Returns - ------- - loss : torch.FloatTensor - mean cross-entropy loss across the batch indices. - - Notes - ----- - Crossentropy is defined as: - - .. math:: - - H(P, Q) = -\Sum_{k \in K} P(k) log(Q(k)) - - where P, Q are discrete probability distributions defined - with a common support K. - - References - ---------- - See for class weight computation: - https://pytorch.org/docs/stable/nn.html#crossentropyloss - """ - if pred_.size() != label.size(): - msg = ( - f"pred size {pred_.size()} not compatible with label size {label.size()}\n" - ) - raise ValueError(msg) - - if reduction.lower() not in ("mean", "sum", "none"): - raise ValueError(f"{reduction} is not a valid reduction method.") - - # Apply softmax transform to predictions and log transform - pred_log_sm = torch.nn.functional.log_softmax(pred_, dim=1) - # Compute cross-entropy with the label vector - samplewise_loss = -1 * torch.sum(label * pred_log_sm, dim=1) - - if sample_weight is not None: - # weight individual samples using sample_weight - # we squeeze into a single column in-case it had an - # empty singleton dimension - samplewise_loss *= sample_weight.squeeze() - - if class_weight is not None: - class_weight = class_weight.to(label.device) - # weight the losses - # copy the weights across the batch to allow for elementwise - # multiplication with the samplewise losses - class_weight = class_weight.repeat(samplewise_loss.size(0), 1) - # compute an [N,] vector of weights for each samples' loss - weight_vec, _ = torch.max( - class_weight * label, - dim=1, - ) - - samplewise_loss = samplewise_loss * weight_vec - if reduction == "mean": - loss = torch.mean(samplewise_loss) - elif reduction == "sum": - loss = torch.sum(samplewise_loss) - else: - loss = samplewise_loss - return loss - - -class scNymCrossEntropy(nn.Module): - def __init__( - self, - class_weight: torch.FloatTensor = None, - sample_weight: torch.FloatTensor = None, - reduction: str = "mean", - ) -> None: - """Class wrapper for scNym cross-entropy loss to be used - in conjuction with `MultiTaskTrainer` - - Parameters - ---------- - class_weight : torch.FloatTensor - [C,] relative weights for each of the output classes. - useful for increasing attention to underrepresented - classes. - reduction : str - reduction method across the batch. - - See Also - -------- - cross_entropy - .trainer.MultiTaskTrainer - """ - super(scNymCrossEntropy, self).__init__() - - self.class_weight = class_weight - self.sample_weight = sample_weight - self.reduction = reduction - return - - def __call__( - self, - labeled_sample: dict, - unlabeled_sample: dict, - model: nn.Module, - weight: float = None, - ) -> torch.FloatTensor: - """Perform class prediction and compute the supervised loss - - Parameters - ---------- - labeled_sample : dict - input - torch.FloatTensor - [Batch, Features] minibatch of labeled examples. - output - torch.LongTensor - one-hot labels. - unlabeled_sample : dict - input - torch.FloatTensor - [Batch, Features] minibatch of unlabeled samples. - output - torch.LongTensor - zeros. - pass `None` if there are no unlabeled samples. - model : nn.Module - model with parameters accessible via the `.parameters()` - method. - weight : float - default None. no-op, included for API compatibility. - - - Returns - ------- - loss : torch.FloatTensor - """ - data = labeled_sample["input"] - # forward pass - outputs, x_embed = model(data, return_embed=True) - probs = torch.nn.functional.softmax(outputs, dim=-1) - _, predictions = torch.max(probs, dim=-1) - - # compute loss - loss = cross_entropy( - pred_=probs, - label=labeled_sample["output"], - sample_weight=self.sample_weight, - class_weight=self.class_weight, - reduction=self.reduction, - ) - - labeled_sample["embed"] = x_embed - - if unlabeled_sample is not None: - outputs, u_embed = model(unlabeled_sample["input"], return_embed=True) - unlabeled_sample["embed"] = u_embed - - return loss - - -class InterpolationConsistencyLoss(nn.Module): - def __init__( - self, - unsup_criterion: Callable, - sup_criterion: Callable, - decay_coef: float = 0.9997, - mean_teacher: bool = True, - augment: Callable = None, - teacher_eval: bool = True, - teacher_bn_running_stats: bool = None, - **kwargs, - ) -> None: - """Computes an Interpolation Consistency Loss - given a trained model and an unlabeled minibatch. - - Parameters - ---------- - unsup_criterion : Callable - loss criterion for similarity between "mixed-up" - "fake labels" and predicted labels for "mixed-up" - samples. - sup_criterion : Callable - loss for samples with a primarily labeled component. - decay_coef : float - decay coefficient for mean teacher parameter - updates. - mean_teacher : bool - use a mean teacher model for interpolation consistency - loss estimation. - augment : Callable - augments a batch of samples. - teacher_eval : bool - place teacher in evaluation mode, deactivating stochastic - model components. - teacher_bn_running_stats : bool - use running statistics for batch normalization mean and - variance. - if False, uses minibatch statistics. - if None, uses setting of the student model batch norm layer. - - Returns - ------- - None. - - Notes - ----- - Instantiates a `SampleMixUp` class and passes any - `**kwargs` to this class. - - Uses a "mean teacher" method by keeping a running - average of parameter sets used in the `__call__` - method. - - `decay_coef` taken from the Mean Teacher paper experiments - on ImageNet. - https://arxiv.org/abs/1703.01780 - - Formalism: - - .. math:: - - icl(u) = criterion( f(Mixup(u_i, u_j)), - Mixup(f(u_i), f(u_j)) ) - - References - ---------- - 1. Interpolation consistency training for semi-supervised learning - 2019, arXiv:1903.03825v3, stat.ML - Vikas Verma, Alex Lamb, Juho Kannala, Yoshua Bengio - - 2. Mean teachers are better role models: \ - Weight-averaged consistency targets improve \ - semi-supervised deep learning results - 2017, arXiv:1703.01780, cs.NE - Antti Tarvainen, Harri Valpola - """ - super(InterpolationConsistencyLoss, self).__init__() - - self.unsup_criterion = unsup_criterion - self.sup_criterion = sup_criterion - self.decay_coef = decay_coef - self.mean_teacher = mean_teacher - if self.mean_teacher: - print("IC Loss is using a mean teacher.") - self.augment = augment - self.teacher_eval = teacher_eval - self.teacher_bn_running_stats = teacher_bn_running_stats - - # instantiate a callable MixUp operation - self.mixup_op = SampleMixUp(**kwargs) - - self.teacher = None - self.step = 0 - return - - def _update_teacher( - self, - model: nn.Module, - ) -> None: - """Update the teacher model based on settings""" - if self.mean_teacher: - if self.teacher is None: - # instantiate the teacher with a copy - # of the model - self.teacher = copy.deepcopy( - model, - ) - else: - self._update_teacher_params( - model, - ) - else: - self.teacher = copy.deepcopy( - model, - ) - - if self.teacher_eval: - self.teacher = self.teacher.eval() - - if self.teacher_bn_running_stats is not None: - # enforce our preference for teacher model batch - # normalization statistics - for m in self.teacher.modules(): - if isinstance(m, nn.BatchNorm1d): - m.track_running_stats = self.teacher_bn_running_stats - - # check that our parameters are preserved - if self.teacher_bn_running_stats is not None: - # enforce our preference for teacher model batch - # normalization statistics - for m in self.teacher.modules(): - if isinstance(m, nn.BatchNorm1d): - assert m.track_running_stats == self.teacher_bn_running_stats - - return - - def _update_teacher_params( - self, - model: nn.Module, - ) -> None: - """Update parameters in the teacher model using an - exponential averaging method. - - Notes - ----- - Logic derived from the Mean Teacher implementation - https://github.com/CuriousAI/mean-teacher/ - """ - # Per the mean-teacher paper, we use the global average - # of parameter values until the exponential average is more effective - # For a `decay_coef ~= 0.997`, this hand-off happens at ~step 333. - alpha = min(1 - 1 / (self.step + 1), self.decay_coef) - # Perform in-place operations on the teacher parameters to average - # with the new model parameters - # Here, we're computing a simple weighted average where alpha is - # the weight on past parameters, and (1 - alpha) is the weight on - # new parameters - zipped_params = zip(self.teacher.parameters(), model.parameters()) - for teacher_param, model_param in zipped_params: - (teacher_param.data.mul_(alpha).add_(1 - alpha, model_param.data)) - return - - def __call__( - self, - model: nn.Module, - unlabeled_sample: dict, - labeled_sample: dict, - ) -> torch.FloatTensor: - """Takes a model and set of unlabeled samples as input - and computes the Interpolation Consistency Loss. - - Parameters - ---------- - model : nn.Module - model with parameters accessible via the `.parameters()` - method. - unlabeled_sample : dict - input - torch.FloatTensor - [Batch, Features] minibatch of unlabeled samples. - output - torch.LongTensor - zeros. - labeled_sample : dict - input - torch.FloatTensor - [Batch, Features] minibatch of labeled examples. - output - torch.LongTensor - one-hot labels. - - Returns - ------- - supervised_loss : torch.FloatTensor - supervised loss computed using `sup_criterion` between - model predictions on mixed observations and true labels. - unsupervised_loss : torch.FloatTensor - unsupervised loss computed using `criterion` and the - interpolation consistency method. - supervised_outputs : torch.FloatTensor - [Batch, Classes] model outputs for augmented labeled examples. - - - Notes - ----- - Algorithm description: - - (0) Update the mean teacher. - (1) Compute "fake labels" for unlabeled samples by performing - a forward pass through the "mean teacher" and using the output - as a representative label for the sample. - (2) Perform a MixUp operation on unlabeled samples and their - corresponding fake labels. - (3) Compute the loss criterion between the mixed-up fake labels - and the predicted fake labels for the mixed up samples. - """ - ############################### - # (0) Update the mean teacher - ############################### - - self._update_teacher( - model, - ) - - ############################### - # (1) Compute Fake Labels - ############################### - - with torch.no_grad(): - fake_y = F.softmax( - self.teacher(unlabeled_sample["input"]), - dim=1, - ) - - ############################### - # (2) Perform MixUp and Forward - ############################### - - unlabeled_sample["output"] = fake_y - - mixed_sample = self.mixup_op(unlabeled_sample) - # move sample to model device if necessary - mixed_sample["input"] = mixed_sample["input"].to( - device=next(model.parameters()).device, - ) - mixed_output = F.softmax( - model(mixed_sample["input"]), - ) - assert mixed_output.requires_grad - - # set outputs as attributes for later access - self.mixed_output = mixed_output - self.mixed_sample = mixed_sample - self.unlabeled_sample = unlabeled_sample - - ############################### - # (3) Compute unsupervised loss - ############################### - - icl = self.unsup_criterion( - mixed_output, - fake_y, - ) - - ############################### - # (4) Compute supervised loss - ############################### - - if self.augment is not None: - labeled_sample = self.augment(labeled_sample) - # move sample to the model device if necessary - labeled_sample["input"] = labeled_sample["input"].to( - device=next(model.parameters()).device, - ) - labeled_sample["input"].requires_grad = True - - sup_outputs = model(labeled_sample["input"]) - sup_loss = self.sup_criterion( - sup_outputs, - labeled_sample["output"], - ) - - self.step += 1 - return sup_loss, icl, sup_outputs - - -def sharpen_labels( - q: torch.FloatTensor, - T: float = 0.5, -) -> torch.FloatTensor: - """Reduce the entropy of a categorical label using a - temperature adjustment - - Parameters - ---------- - q : torch.FloatTensor - [N, C] pseudolabel. - T : float - temperature parameter. - - Returns - ------- - q_s : torch.FloatTensor - [C,] sharpened pseudolabel. - - Notes - ----- - .. math:: - - S(q, T) = q_i^{1/T} / \sum_j^L q_j^{1/T} - - """ - if T == 0.0: - # equivalent to argmax - _, idx = torch.max(q, dim=1) - oh = torch.nn.functional.one_hot( - idx, - num_classes=q.size(1), - ) - return oh - - if T == 1.0: - # no-op - return q - - q = torch.pow(q, 1.0 / T) - q /= torch.sum( - q, - dim=1, - ).reshape(-1, 1) - return q - - -class MixMatchLoss(InterpolationConsistencyLoss): - """Compute the MixMatch Loss given a batch of labeled - and unlabeled examples. - - Attributes - ---------- - n_augmentations : int - number of augmentated samples to average across when - computing pseudolabels. - default = 2 from MixMatch paper. - T : float - temperature parameter. - augment_pseudolabels : bool - perform augmentations during pseudolabel generation. - pseudolabel_min_confidence : float - minimum confidence to compute a loss for a given pseudolabeled - example. examples below this confidence threshold will be given - `0` loss. see the `FixMatch` paper for discussion. - teacher : nn.Module - teacher model for pseudolabeling. - running_confidence_scores : list - [n_batches_to_store,] (torch.Tensor, torch.Tensor,) of unlabeled - example (Confident_Bool, BestConfidenceScore) tuples. - n_batches_to_store : int - determines how many batches to keep in `running_confidence_scores`. - """ - - def __init__( - self, - n_augmentations: int = 2, - T: float = 0.5, - augment_pseudolabels: bool = True, - pseudolabel_min_confidence: float = 0.0, - **kwargs, - ) -> None: - """Compute the MixMatch Loss given a batch of labeled - and unlabeled examples. - - Parameters - ---------- - n_augmentations : int - number of augmentated samples to average across when - computing pseudolabels. - default = 2 from MixMatch paper. - T : float - temperature parameter. - augment_pseudolabels : bool - perform augmentations during pseudolabel generation. - pseudolabel_min_confidence : float - minimum confidence to compute a loss for a given pseudolabeled - example. examples below this confidence threshold will be given - `0` loss. see the `FixMatch` paper for discussion. - - Returns - ------- - None. - - References - ---------- - MixMatch: A Holistic Approach to Semi-Supervised Learning - http://papers.nips.cc/paper/8749-mixmatch-a-holistic-approach-to-semi-supervised-learning - - FixMatch: https://arxiv.org/abs/2001.07685 - """ - # inherit from IC loss, forcing the SampleMixUp to keep - # the identity of the dominant observation in each mixed sample - super(MixMatchLoss, self).__init__( - **kwargs, - keep_dominant_obs=True, - ) - if not callable(self.augment): - msg = "MixMatch requires a Callable for augment" - raise TypeError(msg) - self.n_augmentations = n_augmentations - self.augment_pseudolabels = augment_pseudolabels - self.T = T - - self.pseudolabel_min_confidence = pseudolabel_min_confidence - # keep a running score of the last 50 batches worth of pseudolabel - # confidence outcomes - self.n_batches_to_store = 50 - self.running_confidence_scores = [] - return - - @torch.no_grad() - def _generate_labels( - self, - unlabeled_sample: dict, - ) -> torch.FloatTensor: - """Generate labels by applying a set of augmentations - to each unlabeled example and keeping the mean. - - Parameters - ---------- - unlabeled_batch : dict - "input" - [Batch, Features] minibatch of unlabeled samples. - """ - # let the teacher model take guesses at the label for augmented - # versions of the unlabeled observations - raw_guesses = [] - for i in range(self.n_augmentations): - to_augment = { - "input": unlabeled_sample["input"].clone(), - "output": torch.zeros(1), - } - if self.augment_pseudolabels: - # augment the batch before pseudolabeling - augmented_batch = self.augment(to_augment) - else: - augmented_batch = to_augment - # convert model guess to probability distribution `q` - # with softmax, prior to considering it a label - guess = F.softmax( - self.teacher(augmented_batch["input"]), - dim=1, - ) - raw_guesses.append(guess) - - # compute pseudolabels as the mean across all label guesses - pseudolabels = torch.mean( - torch.stack( - raw_guesses, - dim=0, - ), - dim=0, - ) - - # before sharpening labels, determine if the labels are - # sufficiently confidence to use - highest_conf, likeliest_class = torch.max( - pseudolabels, - dim=1, - ) - # confident is a bool that we will use to decide if we should - # keep loss from a given example or zero it out - confident = highest_conf >= self.pseudolabel_min_confidence - # store confidence outcomes in a running list so we can monitor - # which fraction of pseudolabels are being used - if len(self.running_confidence_scores) > self.n_batches_to_store: - # remove the oldest batch - self.running_confidence_scores.pop(0) - - # store tuples of (torch.Tensor, torch.Tensor) - # (confident_bool, highest_conf_score) - self.running_confidence_scores.append( - ( - confident.detach().cpu(), - highest_conf.detach().cpu(), - ), - ) - - if self.T is not None: - # sharpen labels - pseudolabels = sharpen_labels( - q=pseudolabels, - T=self.T, - ) - # ensure pseudolabels aren't attached to the - # computation graph - pseudolabels = pseudolabels.detach() - - return pseudolabels, confident - - def __call__( - self, - model: nn.Module, - labeled_sample: dict, - unlabeled_sample: dict, - **kwargs, - ) -> Tuple[torch.FloatTensor, torch.FloatTensor]: - """ - Parameters - ---------- - model : nn.Module - model with parameters accessible via the `.parameters()` - method. - labeled_sample : dict - input - torch.FloatTensor - [Batch, Features] minibatch of labeled examples. - output - torch.LongTensor - one-hot labels. - unlabeled_sample : dict - input - torch.FloatTensor - [Batch, Features] minibatch of unlabeled samples. - output - torch.LongTensor - zeros. - - - Returns - ------- - supervised_loss : torch.FloatTensor - supervised loss computed using `sup_criterion` between - model predictions on mixed observations and true labels. - unsupervised_loss : torch.FloatTensor - unsupervised loss computed using `criterion` between - model predictions on mixed unlabeled observations - and pseudolabels generated as the mean - across `n_augmentations` augmentation runs. - supervised_outputs : torch.FloatTensor - [Batch, Classes] model outputs for augmented labeled examples. - """ - - ######################################## - # (0) Update the mean teacher - ######################################## - - self._update_teacher( - model, - ) - - ######################################## - # (1) Generate labels for unlabeled data - ######################################## - - pseudolabels, pseudolabel_confidence = self._generate_labels( - unlabeled_sample=unlabeled_sample, - ) - # make sure pseudolabels match real label dtype - # so that they can be concatenated - pseudolabels = pseudolabels.to(dtype=labeled_sample["output"].dtype) - - ######################################## - # (2) Augment the labeled data - ######################################## - - labeled_sample = self.augment( - labeled_sample, - ) - - ######################################## - # (3) Perform MixUp across both batches - ######################################## - n_unlabeled_original = unlabeled_sample["input"].size(0) - unlabeled_sample["output"] = pseudolabels - - # separate samples into confident and unconfident sample dicts - # we only allow samples with confident pseudolabels to - # participate in the MixUp operation - conf_unlabeled_sample = {} - ucnf_unlabeled_sample = {} - - for k in unlabeled_sample.keys(): - conf_unlabeled_sample[k] = unlabeled_sample[k][pseudolabel_confidence] - ucnf_unlabeled_sample[k] = unlabeled_sample[k][~pseudolabel_confidence] - - # unlabeled samples come BEFORE labeled samples - # in the concatenated sample - # NOTE: we only allow confident unlabeled samples - # into the concatenated sample used for MixUp - cat_sample = { - k: torch.cat( - [ - conf_unlabeled_sample[k], - labeled_sample[k], - ], - dim=0, - ) - for k in ["input", "output"] - } - - # mixup the concatenated sample - # NOTE: dominant observations are maintained - # by passing `keep_dominant_obs=True` in - # `self.__init__` - mixed_samples = self.mixup_op( - cat_sample, - ) - - ######################################## - # (4) Forward pass for mixed samples - ######################################## - - # split the mixed samples based on the dominant - # observation - n_unlabeled = conf_unlabeled_sample["input"].size(0) - unlabeled_m_ = mixed_samples["input"][:n_unlabeled] - unlabeled_y_ = mixed_samples["output"][:n_unlabeled] - - labeled_m_ = mixed_samples["input"][n_unlabeled:] - labeled_y_ = mixed_samples["output"][n_unlabeled:] - - # append low confidence samples to unlabeled_m_ and unlabeled_y_ - # this ensures that batch norm is still able to update it's - # statistics based on batches from the train AND target domain - unlabeled_m_ = torch.cat( - [ - unlabeled_m_, - ucnf_unlabeled_sample["input"], - ] - ) - unlabeled_y_ = torch.cat( - [ - unlabeled_y_, - ucnf_unlabeled_sample["output"], - ] - ) - - # perform a forward pass on mixed samples - # NOTE: Our unsupervised criterion operates on post-softmax - # probability vectors, so we transform the output here - unlabeled_z_ = F.softmax( - model(unlabeled_m_), - dim=1, - ) - # NOTE: Our supervised criterion operates directly on - # logits and performs a `logsoftmax()` internally - labeled_z_ = model(labeled_m_) - - ######################################## - # (5) Compute losses - ######################################## - - # compare mixed pseudolabels to the model guess - # on the mixed input - # NOTE: this returns an **unreduced** loss of size - # [Batch,] or [Batch, Classes] depending on the loss function - unsupervised_loss = self.unsup_criterion( - unlabeled_z_, - unlabeled_y_, - ) - # sum loss across classes if not reduced in the loss - if unsupervised_loss.dim() > 1: - unsupervised_loss = torch.sum(unsupervised_loss, dim=1) - - # scale the loss to 0 for all observations without confident pseudolabels - # this allows the loss to slowly ramp up as labels become more confident - scale_vec = ( - torch.zeros_like(unsupervised_loss) - .float() - .to(device=unsupervised_loss.device) - ) - scale_vec[:n_unlabeled] += 1.0 - unsupervised_loss = unsupervised_loss * scale_vec - unsupervised_loss = torch.mean(unsupervised_loss) - - # compute model guess on the mixed supervised input - # to the mixed labels - # NOTE: we didn't allow non-confident pseudolabels - # into the MixUp, so this shouldn't propogate any - # poor quality pseudolabel information - supervised_loss = self.sup_criterion( - labeled_z_, - labeled_y_, - ) - - self.step += 1 - - return supervised_loss, unsupervised_loss, labeled_z_ - - -class MultiTaskMixMatchWrapper(nn.Module): - def __init__( - self, - mixmatch_loss: MixMatchLoss, - sup_weight: Union[float, Callable] = 1.0, - unsup_weight: Union[float, Callable] = 1.0, - use_sup_eval: bool = True, - ) -> None: - """Wrapper around the `MixMatchLoss` class for use with `MultiTaskTrainer`. - The wrapper performs weighting of the supervised and unsupervised loss - internally, then returns a single `torch.FloatTensor` to `MultiTaskTrainer` - to maintain a consistent "one criterion, one loss" API. - - Parameters - ---------- - mixmatch_loss : MixMatchLoss - an instance of the `MixMatchLoss` class. - sup_weight : float, Callable - constant weight or callable weight schedule function for the - supervised MixMatch loss. - unsup_weight : float, Callable - constant weight or callable weight schedule function for the - unsupervised MixMatch loss. - use_sup_eval : bool - use only the supervised loss when in eval mode. - - Returns - ------- - None. - - Notes - ----- - Relies upon updating the `.epoch` attribute during the training - loop to properly enforce weight scheduling. - """ - super(MultiTaskMixMatchWrapper, self).__init__() - self.mixmatch_loss = mixmatch_loss - self.sup_weight = sup_weight - self.unsup_weight = unsup_weight - self.use_sup_eval = use_sup_eval - # initialize the epoch attribute so `MultiTaskTrainer` can find it - # `.epoch` will be updated in the training loop - self.epoch = 0 - return - - def __call__( - self, - *, - labeled_sample: dict, - unlabeled_sample: dict, - model: nn.Module, - weight: float = None, - ) -> torch.FloatTensor: - """Compute MixMatch losses, weight them internally, then return - the weighted sum. - - Parameters - ---------- - labeled_sample : dict - input - torch.FloatTensor - [Batch, Features] minibatch of labeled examples. - output - torch.LongTensor - one-hot labels. - unlabeled_sample : dict - input - torch.FloatTensor - [Batch, Features] minibatch of unlabeled samples. - output - torch.LongTensor - zeros. - model : nn.Module - model with parameters accessible via the `.parameters()` - method. - weight : float - unused weight parameter for compatability with the `MultiTaskTrainer` - API. - - Returns - ------- - loss : torch.FloatTensor - weighted sum of MixMatch supervised and unsupervised loss. - """ - sup_loss, unsup_loss, labeled_z_ = self.mixmatch_loss( - labeled_sample=labeled_sample, - unlabeled_sample=unlabeled_sample, - model=model, - ) - # get weights for each loss by either calling the function or keeping - # the constant value provided - sup_weight = ( - self.sup_weight(self.epoch) - if callable(self.sup_weight) - else self.sup_weight - ) - unsup_weight = ( - self.unsup_weight(self.epoch) - if callable(self.unsup_weight) - else self.unsup_weight - ) - - # don't use the unsupervised loss if we're in eval mode - # `use_sup_eval` is set - if self.use_sup_eval and not self.training: - unsup_weight = 0.0 - - loss = (sup_weight * sup_loss) + (unsup_weight * unsup_loss) - return loss - - -"""Domain adaptation losses""" - - -class DANLoss(nn.Module): - """Compute a domain adaptation network (DAN) loss.""" - - def __init__( - self, - dan_criterion: Callable, - model: CellTypeCLF, - use_conf_pseudolabels: bool = False, - scale_loss_pseudoconf: bool = False, - n_domains: int = 2, - **kwargs, - ) -> None: - """Compute a domain adaptation network loss. - - Parameters - ---------- - dan_criterion : Callable - domain classification criterion `Callable(output, target)`. - model : scnym.model.CellTypeCLF - `CellTypeCLF` model to use for embedding. - use_conf_pseudolabels : bool - only use unlabeled observations with confident pseudolabels - for discrimination. expects `pseudolabel_confidence` to be - passed in the `__call__()` if so. - scale_loss_pseudoconf : bool - scale the weight of the gradients passed to both models based - on the proportion of confident pseudolabels. - n_domains : int - number of domains of origin to predict using the adversary. - - Returns - ------- - None. - - Notes - ----- - **kwargs are passed to `scnym.model.DANN` - - See Also - -------- - scnym.model.DANN - scnym.trainer.MultiTaskTrainer - """ - super(DANLoss, self).__init__() - - self.dan_criterion = dan_criterion - - # build the DANN - self.dann = DANN( - model=model, - n_domains=n_domains, - **kwargs, - ) - self.dann.domain_clf = self.dann.domain_clf.to( - device=next(iter(model.parameters())).device, - ) - # instantiate with small tensor to simplify downstream size - # checking logic - self.x_embed = torch.zeros((1, 1)) - - self.use_conf_pseudolabels = use_conf_pseudolabels - self.scale_loss_pseudoconf = scale_loss_pseudoconf - # note that weighting is performed on gradients internally; - # accessed by `trainer.MultiTaskTrainer` - self.no_weight = True - return - - def __call__( - self, - labeled_sample: dict, - unlabeled_sample: dict = None, - weight: float = 1.0, - pseudolabel_confidence: torch.Tensor = None, - **kwargs, - ) -> torch.FloatTensor: - """Compute the domain adaptation loss on a labeled source - and unlabeled target domain batch. - - Parameters - ---------- - labeled_sample : dict - input - torch.FloatTensor - [BatchL, Features] minibatch of labeled examples. - output - torch.LongTensor - one-hot labels. - unlabeled_sample : dict - input - torch.FloatTensor - [BatchU, Features] minibatch of unlabeled samples. - output - torch.LongTensor - zeros. - weight : float - weight for reversed gradients passed up to the embedding - layer. gradients used for the domain classifier are normal - gradients, but we weight and reverse the gradients flowing - upward to the embedding layer by this constant. - pseudolabel_confidence : torch.Tensor - [BatchU,] boolean identifying observations in `unlabeled_sample` - with confident pseudolabels. - if not None and `self.use_conf_pseudolabels`, only performs - domain discrimination on unlabeled samples with confident - pseudolabels. - **kwargs : dict - kwargs are a no-op, included to allow for `model` kwarg per - `MultiTaskTrainer` API. - - Returns - ------- - dan_loss : torch.FloatTensor - domain adversarial loss term. - """ - # if no unlabeled data is provided, we create a dict of empty - # tensors. these tensors lead to no-ops for all the `.cat` ops - # below. - if unlabeled_sample is None: - t = torch.FloatTensor().to(device=labeled_sample["input"].device) - unlabeled_sample = {k: t for k in ["input", "domain"]} - - ######################################## - # (1) Create domain labels - ######################################## - - # check if domain labels are provided, if not assume - # train and target are separate domains - # domain labels of -1 indicate `None` was passed as a domain label - # to `SingleCellDS` - if torch.sum(labeled_sample.get("domain", torch.Tensor([-1])) == -1) > 0: - source_label = torch.zeros(labeled_sample["input"].size(0)).long() - source_label = torch.nn.functional.one_hot( - source_label, - num_classes=2, - ) - logger.debug("DAN source domain labels inferred.") - else: - # domain labels should already by one-hot - source_label = labeled_sample["domain"] - source_label = source_label.to(device=labeled_sample["input"].device) - - if torch.sum(unlabeled_sample.get("domain", torch.Tensor([-1])) == -1) > 0: - target_label = torch.ones(unlabeled_sample["input"].size(0)).long() - target_label = torch.nn.functional.one_hot( - target_label, - num_classes=2, - ) - logger.debug("DAN target domain labels inferred.") - else: - target_label = unlabeled_sample["domain"] - target_label = target_label.to(device=unlabeled_sample["input"].device) - - lx = labeled_sample["input"] - ux = unlabeled_sample["input"] - - ######################################## - # (2) Check confidence of unlabeled obs - ######################################## - - if self.use_conf_pseudolabels and pseudolabel_confidence is not None: - # check confidence of unlabeled observations and remove - # any unconfident observations from the minibatch - ux = ux[pseudolabel_confidence] - target_label = target_label[pseudolabel_confidence] - # store the number of confident unlabeled obs - self.n_conf_pseudolabels = ux.size(0) - self.n_total_unlabeled = unlabeled_sample["input"].size(0) - p_conf_pseudolabels = self.n_conf_pseudolabels / max(self.n_total_unlabeled, 1) - - ######################################## - # (3) Embed points and Classify domains - ######################################## - - x = torch.cat([lx, ux], 0) - dlabel = torch.cat([source_label, target_label], 0) - - self.dann.set_rev_grad_weight(weight=weight) - domain_pred, x_embed = self.dann(x) - - # store embeddings and labels - if x_embed.size(0) >= self.x_embed.size(0): - self.x_embed = copy.copy(x_embed.detach().cpu()) - self.dlabel = copy.copy(dlabel.detach().cpu()) - - ######################################## - # (4) Compute DAN loss - ######################################## - - dan_loss = self.dan_criterion( - domain_pred, - dlabel, - ) - - ######################################## - # (5) Compute DAN accuracy for logs - ######################################## - - _, dan_pred = torch.max(domain_pred, dim=1) - _, dlabel_int = torch.max(dlabel, dim=1) - self.dan_acc = ( - torch.sum( - dan_pred == dlabel_int, - ) - / float(dan_pred.size(0)) - ) - - if self.scale_loss_pseudoconf: - dan_loss *= p_conf_pseudolabels - - return dan_loss - - -"""Reconstruction losses""" - - -def poisson_loss( - input_: torch.FloatTensor, - target: torch.FloatTensor, - dispersion: torch.FloatTensor = None, -) -> torch.FloatTensor: - """Compute a Poisson loss for count data. - - Parameters - ---------- - input_ : torch.FloatTensor - [Batch, Feature] Poisson rate parameters. - target : torch.FloatTensor - [Batch, Features] count based target. - dispersion : torch.FloatTensor - Ignored for Poisson loss. - - Returns - ------- - nll : torch.FloatTensor - Poisson negative log-likelihood. - """ - # input_ are Poisson rates, compute likelihood of target data - # and sum likelihood across genes - nll = -1 * torch.sum( - torch.distributions.Poisson(input_).log_prob(target), - dim=-1, - ) - return nll - - -def negative_binomial_loss( - input_: torch.FloatTensor, - target: torch.FloatTensor, - dispersion: torch.FloatTensor, - eps: float = 1e-8, -) -> torch.FloatTensor: - """Compute a Negative Binomial loss for count data. - - Parameters - ---------- - input_ : torch.FloatTensor - [Batch, Feature] Negative Binomial mean parameters. - target : torch.FloatTensor - [Batch, Features] count based target. - dispersion : torch.FloatTensor - [Features,] Negative Binomial dispersion parameters. - eps : float - small constant to avoid numerical issues. - - Returns - ------- - nll : torch.FloatTensor - Negative Binomial negative log-likelihood. - - References - ---------- - Credit to `scvi-tools`: - https://github.com/YosefLab/scvi-tools/blob/42315756ba879b9421630696ea7afcd74e012a07/scvi/distributions/_negative_binomial.py#L67 - """ - res = -1 * (NegativeBinomial(mu=input_, theta=dispersion).log_prob(target).sum(-1)) - return res - - -def mse_loss( - input_: torch.FloatTensor, - target: torch.FloatTensor, - dispersion: torch.FloatTensor, -) -> torch.FloatTensor: - """MSELoss wrapped for scNym compatibility""" - return torch.nn.functional.mse_loss(input_, target) - - -class ReconstructionLoss(nn.Module): - """Computes a reconstruction of the input data from the - embedding""" - - def __init__( - self, - *, - model: nn.Module, - rec_criterion: Callable, - reduction: str = "mean", - norm_before_loss: float = None, - **kwargs, - ) -> None: - """Computes a reconstruction loss of the input data - from the embedding. - - Parameters - ---------- - model : nn.Module - cell type classification model to use for cellular - embedding. - rec_criterion : Callable - reconstruction loss that takes two arguments `(input_, target)`. - reduction : str - {"none", "mean", "sum"} reduction operation for [Batch,] loss values. - norm_before_loss : float - normalize profiles to the following depth before computing loss. - this helps balance loss contribution from cells with dramatically - different depths (e.g. Drop-seq and Smart-seq2). - if `None`, does not normalize before loss. - **kwargs : dict - passed to recontruction model `.model.AE`. - - Returns - ------- - None. - """ - super(ReconstructionLoss, self).__init__() - - self.rec_criterion = rec_criterion - self.model = model - self.reduction = reduction - if reduction not in (None, "none", "sum", "mean"): - msg = f"reduction argument {self.reduction} is invalid." - raise ValueError(msg) - self.norm_before_loss = norm_before_loss - - # build the reconstruction autoencoder - self.rec_model = AE( - model=model, - **kwargs, - ) - # move rec_model to the appropriate computing device - self.rec_model = self.rec_model.to( - device=list(self.model.parameters())[1].device, - ) - - return - - def __call__( - self, - labeled_sample: dict, - unlabeled_sample: dict = None, - weight: float = 1.0, - **kwargs, - ) -> torch.FloatTensor: - """Compute the domain adaptation loss on a labeled source - and unlabeled target domain batch. - - Parameters - ---------- - labeled_sample : dict - input - torch.FloatTensor - [BatchL, Features] minibatch of labeled examples. - output - torch.LongTensor - [BatchL,] one-hot labels. - embed - torch.FloatTensor, optional - [BatchL, n_hidden] minibatch embedding. - unlabeled_sample : dict, optional. - input - torch.FloatTensor - [BatchU, Features] minibatch of unlabeled samples. - output - torch.LongTensor - [BatchU,] zeros. - embed - torch.FloatTensor, optional - [BatchU, n_hidden] minibatch embedding. - weight : float - reconstruction loss weight. Not used, present for compatability with the - `MultiTaskTrainer` API. - kwargs : dict - currently not used, allows for compatibility with `Trainer` subclasses - that pass `model` to call by default (e.g. as used for the old `MixMatchLoss`). - - Returns - ------- - reconstruction_loss : torch.FloatTensor - reconstruction loss, reduced across the batch. - """ - if unlabeled_sample is None: - # if no unlabeled data is passed, we create empty FloatTensors - # to concat onto the labeled tensors below. - # cat of an empty tensor is a no-op. - t = torch.FloatTensor().to(device=labeled_sample["input"].device) - unlabeled_sample = { - "input": t, - "embed": t, - "domain": t, - } - - # join data into a single batch - x = torch.cat( - [ - labeled_sample["input"], - unlabeled_sample["input"], - ], - dim=0, - ) - - # use pre-computed embeddings if they're available from e.g. - # a previous loss function. - if "embed" in labeled_sample.keys() and "embed" in unlabeled_sample.keys(): - x_embed = torch.cat( - [ - labeled_sample["embed"], - unlabeled_sample["embed"], - ], - dim=0, - ) - else: - x_embed = None - - # pass domain arguments to the reconstruction model if specified - # domains are already [Batch, Domains] one-hot encoded. - if self.rec_model.n_domains > 0: - x_domain = torch.cat( - [ - labeled_sample["domain"], - unlabeled_sample["domain"], - ], - dim=0, - ).to(device=x.device) - else: - x_domain = None - - # perform embedding and reconstruction - # if `x_embed is None`, computes the embedding using the - # trunk of the classification model - x_rec, x_scaled, dispersion, x_embed = self.rec_model( - x, - x_embed=x_embed, - x_domain=x_domain, - ) - - if self.norm_before_loss is not None: - # normalize to a common depth (CP-TenThousand) before computing loss - x_scaled2use = x_scaled / x_scaled.sum(1).view(-1, 1) * 1e6 - x2use = x / x.sum(1).view(-1, 1) * self.norm_before_loss4 - else: - x_scaled2use = x_scaled - x2use = x - - # score reconstruction - reconstruction_loss = self.rec_criterion( - input_=x_scaled2use, - target=x2use, - dispersion=dispersion, - ) - if self.reduction == "mean": - reconstruction_loss = torch.mean(reconstruction_loss) - elif (self.reduction == "none") or (self.reduction is None): - reconstruction_loss = reconstruction_loss - elif self.reduction == "sum": - reconstruction_loss = torch.sum(reconstruction_loss) - else: - msg = f"reduction argument {self.reduction} is invalid." - raise ValueError(msg) - - return reconstruction_loss - - -class LatentL2(nn.Module): - def __init__( - self, - ) -> None: - """Compute an l2-norm penalty on the latent embedding. - This serves as a sufficient regularization in deterministic - regularized autoencoders (RAE), akin to the KL term in VAEs. - - References - ---------- - https://openreview.net/pdf?id=S1g7tpEYDS - """ - super(LatentL2, self).__init__() - - return - - def __call__( - self, - labeled_sample: dict, - unlabeled_sample: dict, - model: nn.Module = None, - weight: float = None, - ) -> torch.FloatTensor: - """Compute an l2 penalty on the latent space of a model""" - # is the embedding pre-computed for both samples? - embed_computed = "embed" in labeled_sample.keys() - if unlabeled_sample is not None: - embed_computed = embed_computed and ("embed" in unlabeled_sample.keys()) - keys = ["input"] - if embed_computed: - keys += ["embed"] - - if unlabeled_sample is not None: - # join tensors across samples - sample = { - k: torch.cat([labeled_sample[k], unlabeled_sample[k]], 0) for k in keys - } - else: - sample = labeled_sample - - if embed_computed: - x_embed = sample["embed"] - else: - data = sample["input"] - logits, x_embed = model(data, return_embed=True) - - l2 = 0.5 * torch.norm(x_embed, p=2) - return l2 - - -# TODO: Consider adding in one of the TC-VAE mutual information -# penalties for latent vars to substitute for the covariance penalty -# inherent in the mean field VAE KL term - - -class UnsupervisedLosses(object): - """Compute multiple unsupervised loss functions""" - - def __init__( - self, - losses: list, - weights: list = None, - ) -> None: - """Compute multiple unsupervised loss functions. - - Parameters - ---------- - losses : List[Callable] - each element in list is a Callable that takes arguments - `labeled_sample, unlabeled_sample` and returns a `torch.FloatTensor` - differentiable loss suitable for backprop. - methods can also take or ignore a `weight` argument. - weights : List[Callable] - matching weight functions for each loss that take an input int epoch - and return a float loss weight. - - Returns - ------- - None. - - Notes - ----- - Computes each loss in serial. - - """ - self.losses = losses - # if no weights are provided, use a uniform schedule with - # weight `1.` for each loss function. - self.weights = weights if weights is not None else [lambda x: 1.0] * len(losses) - return - - def __call__( - self, - labeled_sample: dict, - unlabeled_sample: dict, - ) -> torch.FloatTensor: - loss = torch.zeros( - 1, - ) - for i, fxn in enumerate(self.losses): - fxn_loss = fxn( - labeled_sample=labeled_sample, - unlabeled_sample=unlabeled_sample, - weight=self.weights[i], - ) - loss += fxn_loss - return loss - - -"""Loss weight scheduling""" - - -class ICLWeight(object): - def __init__( - self, - ramp_epochs: int, - burn_in_epochs: int = 0, - max_unsup_weight: float = 10.0, - sigmoid: bool = False, - ) -> None: - """Schedules the interpolation consistency loss - weights across a set of epochs. - - Parameters - ---------- - ramp_epochs : int - number of epochs to increase the unsupervised - loss weight until reaching a maximum value. - burn_in_epochs : int - epochs to wait before increasing the unsupervised loss. - max_unsup_weight : float - maximum weight for the unsupervised loss component. - sigmoid : bool - scale weight using a sigmoid function. - - Returns - ------- - None. - """ - self.ramp_epochs = ramp_epochs - self.burn_in_epochs = burn_in_epochs - self.max_unsup_weight = max_unsup_weight - self.sigmoid = sigmoid - # don't allow division by zero, set step size manually - if self.ramp_epochs == 0.0: - self.step_size = self.max_unsup_weight - else: - self.step_size = self.max_unsup_weight / self.ramp_epochs - print( - "Scaling ICL over %d epochs, %d epochs for burn in." - % (self.ramp_epochs, self.burn_in_epochs) - ) - return - - def _get_weight( - self, - epoch: int, - ) -> float: - """Compute the current weight""" - if epoch >= (self.ramp_epochs + self.burn_in_epochs): - weight = self.max_unsup_weight - elif self.sigmoid: - x = (epoch - self.burn_in_epochs) / self.ramp_epochs - coef = np.exp(-5 * (x - 1) ** 2) - weight = coef * self.max_unsup_weight - else: - weight = self.step_size * (epoch - self.burn_in_epochs) - - return weight - - def __call__( - self, - epoch: int, - ) -> float: - """Compute the weight for an unsupervised IC loss - given the epoch. - - Parameters - ---------- - epoch : int - current training epoch. - - Returns - ------- - weight : float - weight for the unsupervised component of IC loss. - """ - if type(epoch) != int: - raise TypeError(f"epoch must be int, you passed a {type(epoch)}") - if epoch < self.burn_in_epochs: - weight = 0.0 - else: - weight = self._get_weight(epoch) - return weight - - -"""Structured latent variable learning""" - - -class StructuredSparsity(object): - def __init__( - self, - n_genes: int, - n_hidden: int, - gene_sets: dict = None, - gene_names: Iterable = None, - prior_matrix: Union[np.ndarray, torch.Tensor] = None, - n_dense_latent: int = 0, - group_lasso: float = 0.0, - p_norm: int = 1, - nonnegative: bool = False, - ) -> None: - """Add structured sparsity penalties to regularize - weights of an encoding layer. - - Parameters - ---------- - n_genes : int - number of genes in the input layer. - n_hidden : int - number of hidden units in the input layer. - gene_sets : dict, optional. - keys are program names, values are lists of gene names. - must have fewer keys than `n_hidden`. - gene_names : Iterable, optional. - names for genes in `n_genes`. required for use of `gene_sets`. - prior_matrix : np.ndarray, torch.FloatTensor - [n_hidden, n_genes] binary matrix of prior constraints. - if provided with `gene_sets`, this matrix is used instead. - n_dense_latent : int - number of latent variables with no l1 loss applied. - applies to the final `n_dense_latent` variables. - group_lasso : float, optional. - weight for a group LASSO penalty on the second hidden - layer. [Default = 0]. - p_norm : int - p-norm to use for the prior penalty. [Default = 1] for lasso. - nonnegative : bool - apply an L1 penalty to *all* negative values. this implicitly enforces - a roughly non-negative projection matrix. - - Returns - ------- - None. - """ - self.n_genes = n_genes - self.n_hidden = n_hidden - self.gene_sets = gene_sets - self.gene_names = gene_names - self.prior_matrix = None - self.n_dense_latent = n_dense_latent - self.group_lasso = group_lasso - self.p_norm = p_norm - self.nonnegative = nonnegative - - if prior_matrix is None and gene_sets is None: - msg = "Must provide either a prior_matrix or gene_sets to use." - raise ValueError(msg) - - if gene_sets is not None and gene_names is None: - msg = "Must provide `gene_names` to use `gene_sets`." - raise ValueError(msg) - - if gene_sets is not None and gene_names is not None: - - if len(gene_sets.keys()) > self.n_hidden: - # check that we didn't provide too many gene sets - # given the size of our encoder - msg = f"{len(gene_sets.keys())} gene sets provided,\n" - msg += f"but there are only {n_hidden} hidden units.\n" - msg += "Must specify fewer programs than hidden units." - raise ValueError(msg) - - # set `self.prior_matrix` based on the gene sets - # also sets `self.gene_set_names` - self._set_prior_matrix_from_gene_sets() - - if prior_matrix is not None: - # if the prior_matrix was provided, always prefer it. - self.prior_matrix = prior_matrix - - assert self.prior_matrix is not None - return - - def _set_prior_matrix_from_gene_sets( - self, - ) -> None: - """Generate a prior matrix from a set of gene programs - and gene names for the input variables. - """ - self.gene_set_names = sorted(list(self.gene_sets.keys())) - - # [n_programs, n_genes] - P = torch.zeros( - ( - self.n_hidden, - self.n_genes, - ) - ).bool() - - # cast to set for list comprehension speed - gene_names = set(self.gene_names) - for i, k in enumerate(self.gene_set_names): - genes = self.gene_sets[k] - bidx = torch.tensor( - [x in genes for x in gene_names], - dtype=torch.bool, - ) - P[i, :] = bidx - - self.prior_matrix = P - return - - def __call__( - self, - model: nn.Module, - **kwargs, - ) -> torch.FloatTensor: - """Compute the l1 sparsity loss.""" - # get first layer weights - W = dict(model.named_parameters())["embed.0.weight"] - logger.debug(f"Weights {W}, sum: {W.sum()}") - # generate a "penalty" matrix `P` that we'll modify - # before computing the l1 - # this elem-mult zeros out the loss on any annotated - # genes in each gene program - P = W * torch.logical_not(self.prior_matrix).float().to(device=W.device) - logger.debug(f"Penalty {P}, sum {P.sum()}") - # omit the dense latent factors (if any) from the l1 - # computation - n_latent = P.size(0) - self.n_dense_latent - prior_norm = torch.norm(P[:n_latent], p=self.p_norm) - logger.debug(f"l1 {prior_norm}") - - # W1 = dict(model.named_parameters())['embed.4.weight'] - # group_l1 = torch.norm(W1, p=1) - - if self.nonnegative: - # place an optional non-negativity penalty on genes within the gene set - nonneg_inset = W * self.prior_matrix.float().to(device=W.device) - nonneg_norm = torch.norm(nonneg_inset[nonneg_inset < 0], p=self.p_norm) - else: - nonneg_norm = 0.0 - - r = prior_norm + nonneg_norm - return r diff --git a/build/lib/scnym/main.py b/build/lib/scnym/main.py deleted file mode 100644 index 59d84ee..0000000 --- a/build/lib/scnym/main.py +++ /dev/null @@ -1,1678 +0,0 @@ -"""Train scNym models and identify cell type markers""" -import numpy as np -import pandas as pd -from scipy import sparse -import os -import os.path as osp -import scanpy as sc -import logging - -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.utils.data import DataLoader -from sklearn.model_selection import StratifiedKFold -from typing import Union, Tuple -import copy -import itertools -from functools import partial - -from .model import CellTypeCLF - -from .dataprep import SingleCellDS, SampleMixUp, balance_classes -from .dataprep import AUGMENTATION_SCHEMES -from .trainer import Trainer, SemiSupervisedTrainer, MultiTaskTrainer -from .trainer import cross_entropy, get_class_weight -from .trainer import InterpolationConsistencyLoss, ICLWeight, MixMatchLoss, DANLoss -from .losses import scNymCrossEntropy -from .predict import Predicter -from . import utils - -# allow tensorboard outputs even though TF2 is installed -# TF2 broke the tensorboard/pytorch API, so we need to alias -# the old API endpoint below -try: - import tensorflow as tf - tfv = int(tf.__version__.split(".")[0]) -except ImportError: - print("tensorflow is not installed, assuming tensorboard is independent") - tfv = 1 - -if tfv > 1: - import tensorboard as tb - - tf.io.gfile = tb.compat.tensorflow_stub.io.gfile - - -logger = logging.getLogger(__name__) - -# define optimizer map for cli selection -OPTIMIZERS = { - "adadelta": torch.optim.Adadelta, - "adam": torch.optim.Adam, - "adamw": torch.optim.AdamW, - "sgd": torch.optim.SGD, -} - -######################################################### -# Train scNym classification models -######################################################### - - -def repeater(data_loader): - """Use `itertools.repeat` to infinitely loop through - a dataloader. - - Parameters - ---------- - data_loader : torch.utils.data.DataLoader - data loader class. - - Yields - ------ - data : Iterable - batches from `data_loader`. - - Credit - ------ - https://bit.ly/2z0LGm8 - """ - for loader in itertools.repeat(data_loader): - for data in loader: - yield data - - -def fit_model( - X: Union[np.ndarray, sparse.csr.csr_matrix], - y: np.ndarray, - traintest_idx: Union[np.ndarray, tuple], - val_idx: np.ndarray, - batch_size: int, - n_epochs: int, - lr: float, - optimizer_name: str, - weight_decay: float, - ModelClass: nn.Module, - out_path: str, - n_genes: int = None, - mixup_alpha: float = None, - unlabeled_counts: np.ndarray = None, - unsup_max_weight: float = 2.0, - unsup_mean_teacher: bool = False, - ssl_method: str = "mixmatch", - ssl_kwargs: dict = {}, - weighted_classes: bool = False, - balanced_classes: bool = False, - input_domain: np.ndarray = None, - unlabeled_domain: np.ndarray = None, - pretrained: str = None, - patience: int = None, - save_freq: int = None, - tensorboard: bool = True, - **kwargs, -) -> Tuple[float, float]: - """Fit an scNym model given a set of observations and labels. - - Parameters - ---------- - X : np.ndarray - [Cells, Genes] of log1p transformed normalized values. - log1p and normalization performed using scanpy defaults. - y : np.ndarray - [Cells,] integer class labels. - traintest_idx : np.ndarray - [Int,] indices to use for training and early stopping. - a single array will be randomly partitioned, OR a tuple - of `(train_idx, test_idx)` can be passed. - val_idx : np.ndarray - [Int,] indices to hold-out for final model evaluation. - n_epochs : int - number of epochs for training. - lr : float - learning rate. - optimizer_name : str - optimizer to use. {"adadelta", "adam"}. - weight_decay : float - weight decay to apply to model weights. - ModelClass : nn.Module - a model class for construction classification models. - batch_size : int - batch size for training. - fold_indices : list - elements are 2-tuple, with training indices and held-out. - out_path : str - top level path for saving fold outputs. - n_genes : int - number of genes in the input. Not necessarily `X.shape[1]` if - the input matrix has been concatenated with other features. - mixup_alpha : float - alpha parameter for an optional MixUp augmentation during training. - unlabeled_counts : np.ndarray - [Cells', Genes] of log1p transformed normalized values for - unlabeled observations. - unsup_max_weight : float - maximum weight for the unsupervised loss term. - unsup_mean_teacher : bool - use a mean teacher for pseudolabel generation. - ssl_method : str - semi-supervised learning method to use. - ssl_kwargs : dict - arguments passed to the semi-supervised learning loss. - balanced_classes : bool - perform class balancing by undersampling majority classes. - weighted_classes : bool - weight loss for each class based on relative abundance of classes - in the training data. - input_domain : np.ndarray - [Cells,] integer domain labels for training data. - unlabeled_domain : np.ndarray - [Cells',] integer domain labels for unlabeled data. - pretrained : str - path to a pretrained model for initialization. - default: `None`. - patience : int - number of epochs to wait before early stopping. - `None` deactivates early stopping. - save_freq : int - frequency in epochs for saving model checkpoints. - if `None`, saves >=5 checkpoints per model. - tensorboard : bool - save logs to tensorboard. - - Returns - ------- - test_acc : float - classification accuracy on the test set. - test_loss : float - supervised loss on the test set. - """ - # count the number of cell types available - n_cell_types = len(np.unique(y)) - if n_genes is None: - n_genes = X.shape[1] - - if type(traintest_idx) != tuple: - # Set aside 10% of the traintest data for model selection in `test_idx` - train_idx = np.random.choice( - traintest_idx, - size=int(np.floor(0.9 * len(traintest_idx))), - replace=False, - ).astype("int") - test_idx = np.setdiff1d(traintest_idx, train_idx).astype("int") - elif type(traintest_idx) == tuple and len(traintest_idx) == 2: - # use the user provided train/test split - train_idx = traintest_idx[0] - test_idx = traintest_idx[1] - else: - # the user supplied an invalid argument - msg = "`traintest_idx` of type {type(traintest_idx)}\n" - msg += "and length {len(traintest_idx)} is invalid." - raise ValueError(msg) - - # save indices to CSVs for later retrieval - np.savetxt(osp.join(out_path, "train_idx.csv"), train_idx) - np.savetxt(osp.join(out_path, "test_idx.csv"), test_idx) - np.savetxt(osp.join(out_path, "val_idx.csv"), val_idx) - - # balance or weight classes if applicable - if balanced_classes and weighted_classes: - msg = "balancing AND weighting classes is not useful." - msg += "\nPick one mode of accounting for class imbalances." - raise ValueError(msg) - elif balanced_classes and not weighted_classes: - print("Setting up a stratified sampler...") - # we sample classes with weighted likelihood, rather than - # a uniform likelihood of sampling - # we use the inverse of the class count as a weight - # this is normalized in `WeightedRandomSample` - classes, counts = np.unique(y[train_idx], return_counts=True) - sample_weights = 1.0 / counts - - # `WeightedRandomSampler` is kind of funny and takes a weight - # **per example** in the training set, rather than per class. - # here we assign the appropriate class weight to each sample - # in the training set. - weight_per_example = sample_weights[y[train_idx]] - - # we instantiate the sampler with the relevant weight for - # each observation and set the number of total samples to the - # number of samples in our training set - # `WeightedRandomSampler` will sample indices from a multinomial - # with probabilities computed from the normalized vector - # of `weights_per_example`. - sampler = torch.utils.data.sampler.WeightedRandomSampler( - weight_per_example, - len(y[train_idx]), - ) - class_weight = None - elif weighted_classes and not balanced_classes: - # compute class weights - # class weights amplify the loss of some classes and reduce - # the loss of others, inversely proportional to the class - # frequency - print("Weighting classes for training...") - class_weight = get_class_weight(y[train_idx]) - print(class_weight) - print() - sampler = None - else: - print("Not weighting classes and not balancing classes.") - class_weight = None - sampler = None - - # Generate training and model selection Datasets and Dataloaders - X_train = X[train_idx, :] - y_train = y[train_idx] - - X_test = X[test_idx, :] - y_test = y[test_idx] - - # count the number of domains - if ( - (input_domain is None) - and (unlabeled_domain is None) - and (unlabeled_counts is not None) - ): - n_domains = 2 - elif ( - (input_domain is None) - and (unlabeled_domain is None) - and (unlabeled_counts is None) - ): - n_domains = 1 - elif (input_domain is not None) and (unlabeled_domain is None): - input_domain_max = input_domain.max() - n_domains = int(input_domain_max) - elif (input_domain is not None) and (unlabeled_domain is not None): - input_domain_max = input_domain.max() - unlabeled_domain_max = ( - 0 if len(unlabeled_domain) == 0 else unlabeled_domain.max() - ) - n_domains = ( - int( - np.max( - [ - input_domain_max, - unlabeled_domain_max, - ] - ) - ) - + 1 - ) - else: - msg = "domains supplied for only one set of data" - raise ValueError(msg) - print(f"Found {n_domains} unique domains.") - - if input_domain is not None: - d_train = input_domain[train_idx] - d_test = input_domain[test_idx] - else: - d_train = None - d_test = None - - train_ds = SingleCellDS( - X=X_train, - y=y_train, - num_classes=len(np.unique(y)), - domain=d_train, - num_domains=n_domains, - ) - test_ds = SingleCellDS( - X_test, - y_test, - num_classes=len(np.unique(y)), - domain=d_test, - num_domains=n_domains, - ) - logger.debug(f"{len(train_ds)} training samples in DS.") - logger.debug(f"{len(test_ds)} testing samples in DS.") - - train_dl = DataLoader( - train_ds, - batch_size=batch_size, - shuffle=True if sampler is None else False, - sampler=sampler, - drop_last=True, - ) - test_dl = DataLoader( - test_ds, - batch_size=batch_size, - shuffle=True, - ) - logger.debug(f"{len(train_dl)} training samples in DL.") - logger.debug(f"{len(test_dl)} testing samples in DL.") - - dataloaders = { - "train": train_dl, - "val": test_dl, - } - - # Define batch transformers - batch_transformers = {} - if mixup_alpha is not None and ssl_method != "mixmatch": - print("Using MixUp as a batch transformer.") - batch_transformers["train"] = SampleMixUp(alpha=mixup_alpha) - - # Build a cell type classification model and transfer to CUDA - model = ModelClass( - n_genes=n_genes, - n_cell_types=n_cell_types, - **kwargs, - ) - - if pretrained is not None: - # initialize with supplied weights - model.load_state_dict( - torch.load( - pretrained, - map_location="cpu", - ) - ) - - if torch.cuda.is_available(): - model = model.cuda() - - # Set up loss criterion and the model optimizer - # here we use our own cross_entropy loss to handle - # discrete probability distributions rather than - # categorical predictions - if class_weight is None: - criterion = cross_entropy - else: - criterion = partial( - cross_entropy, - class_weight=torch.from_numpy(class_weight).float(), - ) - - opt_callable = OPTIMIZERS[optimizer_name.lower()] - - if opt_callable != torch.optim.SGD: - optimizer = opt_callable( - model.parameters(), - weight_decay=weight_decay, - lr=lr, - ) - scheduler = None - else: - # use SGD as the optimizer with momentum - # and a learning rate scheduler - optimizer = opt_callable( - model.parameters(), - weight_decay=weight_decay, - lr=lr, - momentum=0.9, - ) - scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( - optimizer=optimizer, - T_max=n_epochs, - eta_min=lr / 10000, - ) - - # Build the relevant trainer object for either supervised - # or semi-supervised learning with interpolation consistency - trainer_kwargs = { - "model": model, - "criterion": criterion, - "optimizer": optimizer, - "scheduler": scheduler, - "dataloaders": dataloaders, - "out_path": out_path, - "batch_transformers": batch_transformers, - "n_epochs": n_epochs, - "min_epochs": n_epochs // 20, - "save_freq": max(n_epochs // 5, 1) if save_freq is None else save_freq, - "reg_criterion": None, - "exp_name": osp.basename(out_path), - "verbose": False, - "tb_writer": osp.join(out_path, "tblog") if tensorboard else None, - "patience": patience, - } - - if unlabeled_counts is None and (n_domains == 1): - # perform fully supervised training - print("Performing fully supervised training with no domain adaptation.") - T = Trainer(**trainer_kwargs) - elif unlabeled_counts is None and (n_domains > 1): - print("Performing supervised training with a domain adversary.") - # perform supervised training with DA - # use the MultiTaskTrainer - dan_criterion = ssl_kwargs.get("dan_criterion", None) - if dan_criterion is not None: - # initialize the DAN Loss - - dan_criterion = DANLoss( - model=model, - dan_criterion=cross_entropy, - use_conf_pseudolabels=ssl_kwargs.get( - "dan_use_conf_pseudolabels", False - ), - scale_loss_pseudoconf=ssl_kwargs.get( - "dan_scale_loss_pseudoconf", False - ), - n_domains=n_domains, - ) - - # setup the DANN learning rate schedule - dan_weight = ICLWeight( - ramp_epochs=ssl_kwargs.get("dan_ramp_epochs", max(n_epochs // 4, 1)), - max_unsup_weight=ssl_kwargs.get("dan_max_weight", 1.0), - burn_in_epochs=ssl_kwargs.get("dan_burn_in_epochs", 0), - sigmoid=ssl_kwargs.get("sigmoid", True), - ) - # add DANN parameters to the optimizer - optimizer.add_param_group( - { - "params": dan_criterion.dann.domain_clf.parameters(), - "name": "domain_classifier", - } - ) - ce = scNymCrossEntropy() - criteria = [ - {"name": "dan", "function": dan_criterion, "weight": dan_weight}, - {"name": "ce", "function": ce, "weight": 1.0}, - ] - del trainer_kwargs["criterion"] - trainer_kwargs["criteria"] = criteria - T = MultiTaskTrainer(**trainer_kwargs) - else: - # perform semi-supervised training - unsup_dataset = SingleCellDS( - X=unlabeled_counts, - y=np.zeros(unlabeled_counts.shape[0]), - num_classes=len(np.unique(y)), - domain=unlabeled_domain, - num_domains=n_domains, - ) - - # Build a semi-supervised data loader that infinitely samples - # unsupervised data for interpolation consistency. - # This allows us to loop through the labeled data iterator - # without running out of unlabeled batches. - unsup_dataloader = DataLoader( - unsup_dataset, - batch_size=batch_size, - shuffle=True, - drop_last=True, - ) - unsup_dataloader = repeater(unsup_dataloader) - - # Set up the unsupervised loss - if ssl_method.lower() == "ict": - print("Using ICT for semi-supervised learning") - USL = InterpolationConsistencyLoss( - alpha=mixup_alpha if mixup_alpha is not None else 0.3, - unsup_criterion=nn.MSELoss(), - sup_criterion=criterion, - decay_coef=ssl_kwargs.get("decay_coef", 0.997), - mean_teacher=unsup_mean_teacher, - ) - - elif ssl_method.lower() == "mixmatch": - print("Using MixMatch for semi-supervised learning") - # we want the raw MSE per sample here, rather than the average - # so we set `reduction='none'`. - # this allows us to scale the weight of individual examples - # based on pseudolabel confidence. - unsup_criterion_name = ssl_kwargs.get("unsup_criterion", "mse") - if unsup_criterion_name.lower() == "mse": - unsup_criterion = nn.MSELoss(reduction="none") - elif unsup_criterion_name.lower() in ("crossentropy", "ce"): - unsup_criterion = partial( - cross_entropy, - reduction="none", - ) - USL = MixMatchLoss( - alpha=mixup_alpha if mixup_alpha is not None else 0.3, - unsup_criterion=unsup_criterion, - sup_criterion=criterion, - decay_coef=ssl_kwargs.get("decay_coef", 0.997), - mean_teacher=unsup_mean_teacher, - augment=AUGMENTATION_SCHEMES[ssl_kwargs.get("augment", "log1p_drop")], - n_augmentations=ssl_kwargs.get("n_augmentations", 1), - T=ssl_kwargs.get("T", 0.5), - augment_pseudolabels=ssl_kwargs.get("augment_pseudolabels", True), - pseudolabel_min_confidence=ssl_kwargs.get( - "pseudolabel_min_confidence", 0.0 - ), - ) - else: - msg = f"{ssl_method} is not a valid semi-supervised learning method.\n" - msg += 'must be one of {"ict", "mixmatch"}' - raise ValueError(msg) - - # set up the weight schedule - # we define a number of epochs for ramping, a number to wait - # ("burn_in_epochs") before we start the ramp up, and a maximum - # coefficient value - weight_schedule = ICLWeight( - ramp_epochs=ssl_kwargs.get("ramp_epochs", max(n_epochs // 4, 1)), - max_unsup_weight=unsup_max_weight, - burn_in_epochs=ssl_kwargs.get("burn_in_epochs", 20), - sigmoid=ssl_kwargs.get("sigmoid", False), - ) - # don't let early stopping save checkpoints from before the SSL - # ramp up has started - trainer_kwargs["min_epochs"] = max( - trainer_kwargs["min_epochs"], - weight_schedule.burn_in_epochs + weight_schedule.ramp_epochs // 5, - ) - - # if min_epochs are manually specified, use that number instead - if ssl_kwargs.get("min_epochs", None) is not None: - trainer_kwargs["min_epochs"] = ssl_kwargs["min_epochs"] - - # let the model save weights even if the ramp is - # longer than the total epochs we'll train for - trainer_kwargs["min_epochs"] = min( - trainer_kwargs["min_epochs"], - trainer_kwargs["n_epochs"] - 1, - ) - - dan_criterion = ssl_kwargs.get("dan_criterion", None) - if dan_criterion is not None: - # initialize the DAN Loss - - dan_criterion = DANLoss( - model=model, - dan_criterion=cross_entropy, - use_conf_pseudolabels=ssl_kwargs.get( - "dan_use_conf_pseudolabels", False - ), - scale_loss_pseudoconf=ssl_kwargs.get( - "dan_scale_loss_pseudoconf", False - ), - n_domains=n_domains, - ) - - # setup the DANN learning rate schedule - dan_weight = ICLWeight( - ramp_epochs=ssl_kwargs.get("dan_ramp_epochs", max(n_epochs // 4, 1)), - max_unsup_weight=ssl_kwargs.get("dan_max_weight", 1.0), - burn_in_epochs=ssl_kwargs.get("dan_burn_in_epochs", 0), - sigmoid=ssl_kwargs.get("sigmoid", True), - ) - # add DANN parameters to the optimizer - optimizer.add_param_group( - { - "params": dan_criterion.dann.domain_clf.parameters(), - "name": "domain_classifier", - } - ) - else: - dan_weight = None - - # initialize the trainer - T = SemiSupervisedTrainer( - unsup_dataloader=unsup_dataloader, - unsup_criterion=USL, - unsup_weight=weight_schedule, - dan_criterion=dan_criterion, - dan_weight=dan_weight, - **trainer_kwargs, - ) - - print("Training...") - T.train() - print("Training complete.") - print() - - # Perform model evaluation using the best set of weights on the - # totally unseen, held out data. - print("Evaluating model.") - model = ModelClass( - n_genes=n_genes, - n_cell_types=n_cell_types, - **kwargs, - ) - model.load_state_dict( - torch.load( - osp.join(out_path, "00_best_model_weights.pkl"), - ) - ) - model.eval() - - if torch.cuda.is_available(): - model = model.cuda() - - # Build a DataLoader for validation - X_val = X[val_idx, :] - y_val = y[val_idx] - val_ds = SingleCellDS( - X_val, - y_val, - num_classes=len(np.unique(y)), - ) - val_dl = DataLoader( - val_ds, - batch_size=batch_size, - shuffle=False, - ) - - # Without recording any gradients to speed things up, - # predict classes for all held out data and evaluate metrics. - with torch.no_grad(): - loss = 0.0 - running_corrects = 0.0 - running_total = 0.0 - all_predictions = [] - all_labels = [] - for data in val_dl: - input_ = data["input"] - - label_ = data["output"] # one-hot - - if torch.cuda.is_available(): - input_ = input_.cuda() - label_ = label_.cuda() - - # make an integer version of labels for convenience - int_label_ = torch.argmax(label_, 1) - - # Perform forward pass and compute predictions as the - # most likely class - output = model(input_) - _, predictions = torch.max(output, 1) - - corrects = torch.sum( - predictions.detach() == int_label_.detach(), - ) - - l = criterion(output, label_) - loss += float(l.detach().cpu().numpy()) - - running_corrects += float(corrects.item()) - running_total += float(label_.size(0)) - - all_labels.append(int_label_.detach().cpu().numpy()) - - all_predictions.append(predictions.detach().cpu().numpy()) - - norm_loss = loss / len(val_dl) - acc = running_corrects / running_total - print("EVAL LOSS: ", norm_loss) - print("EVAL ACC : ", acc) - - all_predictions = np.concatenate(all_predictions) - all_labels = np.concatenate(all_labels) - np.savetxt(osp.join(out_path, "predictions.csv"), all_predictions) - np.savetxt(osp.join(out_path, "labels.csv"), all_labels) - - PL = np.stack([all_predictions, all_labels], 0) - print("Predictions | Labels") - print(PL.T[:15, :]) - return acc, norm_loss - - -def train_cv( - X: Union[np.ndarray, sparse.csr.csr_matrix], - y: np.ndarray, - batch_size: int, - n_epochs: int, - lr: float, - optimizer_name: str, - weight_decay: float, - ModelClass: nn.Module, - fold_indices: list, - out_path: str, - n_genes: int = None, - mixup_alpha: float = None, - unlabeled_counts: np.ndarray = None, - unsup_max_weight: float = 2.0, - unsup_mean_teacher: bool = False, - ssl_method: str = "mixmatch", - ssl_kwargs: dict = {}, - weighted_classes: bool = False, - balanced_classes: bool = False, - **kwargs, -) -> Tuple[np.ndarray, np.ndarray]: - """Perform training using a provided set of training/hold-out - sample indices. - - Parameters - ---------- - X : np.ndarray - [Cells, Genes] of log1p transformed normalized values. - log1p and normalization performed using scanpy defaults. - y : np.ndarray - [Cells,] integer class labels. - n_epochs : int - number of epochs for training. - weight_decay : float - weight decay to apply to model weights. - lr : float - learning rate. - optimizer_name : str - optimizer to use. {"adadelta", "adam"}. - ModelClass : nn.Module - a model class for construction classification models. - batch_size : int - batch size for training. - fold_indices : list - elements are 2-tuple, with training indices and held-out. - out_path : str - top level path for saving fold outputs. - n_genes : int - number of genes in the input. Not necessarily `X.shape[1]` if - the input matrix has been concatenated with other features. - mixup_alpha : float - alpha parameter for an optional MixUp augmentation during training. - unsup_max_weight : float - maximum weight for the unsupervised loss term. - unsup_mean_teacher : bool - use a mean teacher for pseudolabel generation. - ssl_method : str - semi-supervised learning method to use. - ssl_kwargs : dict - arguments passed to the semi-supervised learning loss. - balanced_classes : bool - perform class balancing by undersampling majority classes. - weighted_classes : bool - weight loss for each class based on relative abundance of classes - in the training data. - - Returns - ------- - fold_eval_acc : np.ndarray - evaluation accuracies for each fold. - fold_eval_losses : np.ndarray - loss values for each fold. - """ - fold_eval_losses = np.zeros(len(fold_indices)) - fold_eval_acc = np.zeros(len(fold_indices)) - - # Perform training on each fold specified in `fold_indices` - for f in range(len(fold_indices)): - print("Training tissue independent, fold %d." % f) - fold_out_path = osp.join(out_path, "fold" + str(f).zfill(2)) - - os.makedirs(fold_out_path, exist_ok=True) - - traintest_idx = fold_indices[f][0].astype("int") - val_idx = fold_indices[f][1].astype("int") - - acc, loss = fit_model( - X=X, - y=y, - traintest_idx=traintest_idx, - val_idx=val_idx, - out_path=fold_out_path, - batch_size=batch_size, - n_epochs=n_epochs, - ModelClass=ModelClass, - n_genes=n_genes, - lr=lr, - optimizer_name=optimizer_name, - weight_decay=weight_decay, - mixup_alpha=mixup_alpha, - unlabeled_counts=unlabeled_counts, - unsup_max_weight=unsup_max_weight, - unsup_mean_teacher=unsup_mean_teacher, - ssl_method=ssl_method, - ssl_kwargs=ssl_kwargs, - weighted_classes=weighted_classes, - balanced_classes=balanced_classes, - **kwargs, - ) - - fold_eval_losses[f] = loss - fold_eval_acc[f] = acc - return fold_eval_acc, fold_eval_losses - - -def train_all( - X: Union[np.ndarray, sparse.csr.csr_matrix], - y: np.ndarray, - batch_size: int, - n_epochs: int, - ModelClass: nn.Module, - out_path: str, - n_genes: int = None, - lr: float = 1.0, - optimizer_name: str = "adadelta", - weight_decay: float = None, - mixup_alpha: float = None, - unlabeled_counts: np.ndarray = None, - unsup_max_weight: float = 2.0, - unsup_mean_teacher: bool = False, - ssl_method: str = "mixmatch", - ssl_kwargs: dict = {}, - weighted_classes: bool = False, - balanced_classes: bool = False, - **kwargs, -) -> Tuple[float, float]: - """Perform training using all provided samples. - - Parameters - ---------- - X : np.ndarray - [Cells, Genes] of log1p transformed normalized values. - log1p and normalization performed using scanpy defaults. - y : np.ndarray - [Cells,] integer class labels. - n_epochs : int - number of epochs for training. - ModelClass : nn.Module - a model class for construction classification models. - batch_size : int - batch size for training. - out_path : str - top level path for saving fold outputs. - n_genes : int - number of genes in the input. Not necessarily `X.shape[1]` if - the input matrix has been concatenated with other features. - lr : float - learning rate. - optimizer_name : str - optimizer to use. {"adadelta", "adam"}. - weight_decay : float - weight decay to apply to model weights. - balanced_classes : bool - perform class balancing by undersampling majority classes. - weighted_classes : bool - weight loss for each class based on relative abundance of classes - in the training data. - - Returns - ------- - loss : float - best loss on the testing set used for model selection. - acc : float - best accuracy on the testing set used for model selection. - """ - # Prepare a unique output directory - all_out_path = osp.join(out_path, "all_data") - if not osp.exists(all_out_path): - os.mkdir(all_out_path) - - # Generate training and model selection indices - traintest_idx = np.random.choice( - np.arange(X.shape[0]), - size=int(np.floor(0.9 * X.shape[0])), - replace=False, - ).astype("int") - val_idx = np.setdiff1d( - np.arange(X.shape[0]), - traintest_idx, - ).astype("int") - - acc, loss = fit_model( - X=X, - y=y, - traintest_idx=traintest_idx, - val_idx=val_idx, - batch_size=batch_size, - n_epochs=n_epochs, - ModelClass=ModelClass, - out_path=all_out_path, - n_genes=n_genes, - lr=lr, - optimizer_name=optimizer_name, - weight_decay=weight_decay, - mixup_alpha=mixup_alpha, - unlabeled_counts=unlabeled_counts, - unsup_max_weight=unsup_max_weight, - unsup_mean_teacher=unsup_mean_teacher, - ssl_method=ssl_method, - ssl_kwargs=ssl_kwargs, - weighted_classes=weighted_classes, - balanced_classes=balanced_classes, - **kwargs, - ) - - np.savetxt( - osp.join(all_out_path, "test_loss_acc.csv"), - np.array([loss, acc]).reshape(2, 1), - delimiter=",", - ) - - return loss, acc - - -def train_tissue_independent_cv( - X: Union[np.ndarray, sparse.csr.csr_matrix], - metadata: pd.DataFrame, - out_path: str, - balanced_classes: bool = False, - weighted_classes: bool = False, - batch_size: int = 256, - n_epochs: int = 200, - lower_group: str = "cell_ontology_class", - **kwargs, -) -> None: - """ - Trains a cell type classifier that is independent of tissue origin - - Parameters - ---------- - X : np.ndarray - [Cells, Genes] of log1p transformed, normalized values. - log1p and normalization performed using scanpy defaults. - metadata : pd.DataFrame - [Cells, Features] data with `upper_group` and `lower_group` columns. - out_path : str - path for saving trained model weights and evaluation performance. - balanced_classes : bool - perform class balancing by undersampling majority classes. - weighted_classes : bool - weight loss for each class based on relative abundance of classes - in the training data. - batch_size : int - batch size for training. - n_epochs : int - number of epochs for training. - lower_group : str - column in `metadata` corresponding to output classes. i.e. cell types. - - Returns - ------- - None. - - Notes - ----- - Passes `kwargs` to `CellTypeCLF`. - """ - - print("TRAINING TISSUE INDEPENDENT CLASSIFIER") - print("-" * 20) - print() - - if not os.path.exists(out_path): - os.mkdir(out_path) - - # identify all the `lower_group` levels and create - # an integer class vector corresponding to unique levels - celltypes = sorted(list(set(metadata[lower_group]))) - print("There are %d %s in the experiment.\n" % (len(celltypes), lower_group)) - - for t in celltypes: - print(t) - - # identify all the `lower_group` levels and create - # an integer class vector corresponding to unique levels - y = pd.Categorical(metadata[lower_group]).codes - y = y.astype("int32") - labels = pd.Categorical(metadata[lower_group]).categories - # save mapping of levels : integer values as a CSV - out_df = pd.DataFrame({"label": labels, "code": np.arange(len(labels))}) - out_df.to_csv(osp.join(out_path, "celltype_label.csv")) - - # generate k-fold cross-validation split indices - # & vectors for metrics evaluated at each fold. - kf = StratifiedKFold(n_splits=5, shuffle=True) - kf_indices = list(kf.split(X, y)) - - # Perform training on each fold specified in `kf_indices` - fold_eval_acc, fold_eval_losses = train_cv( - X=X, - y=y, - batch_size=batch_size, - n_epochs=n_epochs, - ModelClass=CellTypeCLF, - fold_indices=kf_indices, - out_path=out_path, - balanced_classes=balanced_classes, - weighted_classes=weighted_classes, - **kwargs, - ) - - # Save the per-fold results to CSVs - - print("Fold eval losses") - print(fold_eval_losses) - print("Fold eval accuracy") - print(fold_eval_acc) - print("Mean %f Std %f" % (fold_eval_losses.mean(), fold_eval_losses.std())) - np.savetxt( - osp.join( - out_path, - "fold_eval_losses.csv", - ), - fold_eval_losses, - ) - np.savetxt( - osp.join( - out_path, - "fold_eval_acc.csv", - ), - fold_eval_acc, - ) - - # Train a model using all available data (after class balancing) - val_loss, val_acc = train_all( - X=X, - y=y, - batch_size=batch_size, - n_epochs=n_epochs, - ModelClass=CellTypeCLF, - out_path=out_path, - balanced_classes=balanced_classes, - weighted_classes=weighted_classes, - **kwargs, - ) - - return - - -def train_one_tissue_cv( - X: Union[np.ndarray, sparse.csr.csr_matrix], - metadata: pd.DataFrame, - out_path: str, - balanced_classes: bool = False, - weighted_classes: bool = False, - batch_size: int = 256, - n_epochs: int = 200, - upper_group: str = "tissue", - lower_group: str = "cell_ontology_class", - **kwargs, -) -> None: - """ - Trains a cell type classifier for a single tissue - - Parameters - ---------- - X : np.ndarray - [Cells, Genes] of log1p transformed, normalized values. - log1p and normalization performed using scanpy defaults. - metadata : pd.DataFrame - [Cells, Features] data with `upper_group` and `lower_group` columns. - out_path : str - path for saving trained model weights and evaluation performance. - balanced_classes : bool, optional - perform class balancing by undersampling majority classes. - weighted_classes : bool - weight loss for each class based on relative abundance of classes - in the training data. - upper_group : str - column in `metadata` with subsets for training `lower_group` - classifiers independently. i.e. tissues. - lower_group : str - column in `metadata` corresponding to output classes. i.e. cell types. - - Returns - ------- - None. - """ - - tissue_str = str(list(metadata[upper_group])[0]).lower() - print( - "TRAINING %s DEPENDENT CLASSIFIER FOR: " % upper_group.upper(), - tissue_str.upper(), - ) - print("-" * 20) - print() - - celltypes = sorted(list(set(metadata[lower_group]))) - print("There are %d %s in the experiment.\n" % (len(celltypes), lower_group)) - for t in celltypes: - print(t) - print("") - y = pd.Categorical(metadata[lower_group]).codes - y = y.astype("int32") - labels = pd.Categorical(metadata[lower_group]).categories - out_df = pd.DataFrame({"label": labels, "code": np.arange(len(labels))}) - out_df.to_csv(osp.join(out_path, "celltype_label.csv")) - - kf = StratifiedKFold(n_splits=5, shuffle=True) - kf_indices = list(kf.split(X, y)) - - # Perform training on each fold specified in `kf_indices` - fold_eval_acc, fold_eval_losses = train_cv( - X=X, - y=y, - batch_size=batch_size, - n_epochs=n_epochs, - ModelClass=CellTypeCLF, - fold_indices=kf_indices, - out_path=out_path, - weighted_classes=weighted_classes, - balanced_classes=balanced_classes, - **kwargs, - ) - - print("Fold eval losses") - print(fold_eval_losses) - print("Fold eval accuracy") - print(fold_eval_acc) - print("Mean %f Std %f" % (fold_eval_losses.mean(), fold_eval_losses.std())) - np.savetxt( - osp.join( - out_path, - "fold_eval_losses.csv", - ), - fold_eval_losses, - ) - np.savetxt( - osp.join( - out_path, - "fold_eval_acc.csv", - ), - fold_eval_acc, - ) - - # Train a model using all available data (after class balancing) - val_loss, val_acc = train_all( - X=X, - y=y, - batch_size=batch_size, - n_epochs=n_epochs, - ModelClass=CellTypeCLF, - out_path=out_path, - weighted_classes=weighted_classes, - balanced_classes=balanced_classes, - **kwargs, - ) - return - - -######################################################### -# Predict cell types with a trained model -######################################################### - - -def predict_cell_types( - X: Union[np.ndarray, sparse.csr.csr_matrix], - model_path: str, - out_path: str, - upper_groups: Union[list, np.ndarray] = None, - lower_group_labels: list = None, - **kwargs, -) -> None: - """Predict cell types using a pretrained model - - Parameters - ---------- - X : np.ndarray, sparse.csr.csr_matrix - [Cells, Genes] of log1p transformed, normalized values. - log1p and normalization performed using scanpy defaults. - model_path : str - path to a set of pretrained model weights. - out_path : str - path for prediction outputs. - upper_groups : list, np.ndarray - [Cells,] iterable of str specifying the `upper_group` for each cell. - if provided, assumes an `upper_group` conditional model. - if `None`, assumes an `upper_group` independent model. - lower_group_labels : list - str labels corresponding to output nodes of the model. - - Returns - ------- - None. - - Notes - ----- - `**kwargs` passed to `scnym.predict.Predicter`. - """ - if upper_groups is not None: - print("Assuming conditional model.") - - X, categories = utils.append_categorical_to_data(X, upper_groups) - np.savetxt( - osp.join(out_path, "category_names.csv"), - categories, - fmt="%s", - delimiter=",", - ) - else: - print("Assuming independent model") - - # Intantiate a prediction object, which handles batch processing - P = Predicter( - model_weights=model_path, - n_genes=X.shape[1], - n_cell_types=None, # infer cell type # from weights - labels=lower_group_labels, - **kwargs, - ) - - predictions, names, scores = P.predict(X, output="score") - - probabilities = F.softmax(torch.from_numpy(scores), dim=1) - probabilities = probabilities.cpu().numpy() - - np.savetxt(osp.join(out_path, "predictions_idx.csv"), predictions, delimiter=",") - np.savetxt(osp.join(out_path, "probabilities.csv"), probabilities, delimiter=",") - np.savetxt(osp.join(out_path, "raw_scores.csv"), scores, delimiter=",") - if names is not None: - np.savetxt( - osp.join(out_path, "predictions_names.csv"), names, delimiter=",", fmt="%s" - ) - return - - -######################################################### -# utilities -######################################################### - - -def load_data( - path: str, -) -> Union[np.ndarray, sparse.csr.csr_matrix]: - """Load a counts matrix from a file path. - - Parameters - ---------- - path : str - path to [npy, csv, h5ad, loom] file. - - Returns - ------- - X : np.ndarray - [Cells, Genes] matrix. - """ - if osp.splitext(path)[-1] == ".npy": - print("Assuming sparse matrix...") - X_raw = np.load(path, allow_pickle=True) - X_raw = X_raw.item() - elif osp.splitext(path)[-1] == ".csv": - X_raw = np.loadtxt(path, delimiter=",") - elif osp.splitext(path)[-1] == ".h5ad": - adata = sc.read_h5ad(path) - X_raw = utils.get_adata_asarray(adata=adata) - elif osp.splitext(path)[-1] == ".loom": - adata = sc.read_loom(path) - X_raw = utils.get_adata_asarray(adata=adata) - else: - raise ValueError( - "unrecognized file type %s for counts" % osp.splitext(path)[-1] - ) - - return X_raw - - -######################################################### -# main() -######################################################### - - -def main(): - import configargparse - import yaml - - parser = configargparse.ArgParser( - description="Train cell type classifiers", - default_config_files=["./configs/default_config.txt"], - ) - parser.add_argument( - "command", - type=str, - help='action to perform. \ - ["train_tissue_independent", \ - "train_tissue_dependent", \ - "train_tissue_specific", \ - "find_cell_type_markers", \ - "predict_cell_types"]', - ) - parser.add_argument( - "-c", is_config_file=True, required=False, help="path to a configuration file." - ) - parser.add_argument( - "--input_counts", - type=str, - required=True, - help="path to input data [Cells, Genes] counts. \ - [npy, csv, h5ad, loom]", - ) - parser.add_argument( - "--input_gene_names", - type=str, - required=True, - help="path to gene names for the input data.", - ) - parser.add_argument( - "--training_gene_names", - type=str, - required=False, - help="path to training data gene names. \ - required for prediction.", - ) - parser.add_argument( - "--training_metadata", - type=str, - required=True, - help="CSV metadata for training. Requires `upper_group` and `lower_group` columns. \ - necessary for prediction to provide cell type names.", - ) - parser.add_argument( - "--lower_group", - type=str, - required=True, - default="cell_ontology_class", - help="column in `metadata` with to output labels. \ - i.e. cell types.", - ) - parser.add_argument( - "--upper_group", - type=str, - required=True, - default="tissue", - help="column in `metadata` with to subsets for independent training. \ - i.e. tissues.", - ) - parser.add_argument( - "--out_path", type=str, required=True, help="path for output files" - ) - parser.add_argument( - "--genes_to_use", - type=str, - default=None, - help="path to a text file of genes to use for training. \ - must be a subset of genes in `training_gene_names`", - ) - parser.add_argument( - "--input_domain_group", - type=str, - help="column in `training_metadata` that specifies domain of origin for each training observation.", - required=False, - default=None, - ) - parser.add_argument( - "--batch_size", type=int, default=256, help="batch size for training" - ) - parser.add_argument( - "--n_epochs", type=int, default=256, help="number of epochs for training" - ) - parser.add_argument( - "--init_dropout", - type=float, - default=0.3, - help="initial dropout to perform on gene inputs", - ) - parser.add_argument( - "--n_hidden", - type=int, - default=128, - help="number of hidden units in the classifier", - ) - parser.add_argument( - "--n_layers", type=int, default=2, help="number of hidden layers in the model" - ) - parser.add_argument( - "--residual", action="store_true", help="use residual layers in the model" - ) - parser.add_argument( - "--track_running_stats", - type=bool, - default=True, - help="track running statistics in batch normalization layers", - ) - parser.add_argument( - "--model_path", - type=str, - default=None, - help="path to pretrained model weights \ - for class marker identification.", - ) - parser.add_argument( - "--weight_decay", - type=float, - default=1e-5, - help="weight decay applied by the optimizer", - ) - parser.add_argument( - "--lr", type=float, default=1.0, help="learning rate for the optimizer." - ) - parser.add_argument( - "--optimizer", - type=str, - default="adadelta", - help="optimizer to use. {adadelta, adam}.", - ) - parser.add_argument( - "--l1_reg", - type=float, - default=1e-4, - help="l1 regularization strength \ - for class marker identification", - ) - parser.add_argument( - "--weight_classes", - type=bool, - default=False, - help="weight loss based on relative class abundance.", - ) - parser.add_argument( - "--balance_classes", type=bool, default=False, help="perform class balancing." - ) - parser.add_argument( - "--mixup_alpha", - type=float, - default=None, - help="alpha parameter for MixUp training. \ - if set performs MixUp, otherwise does not.", - ) - parser.add_argument( - "--unlabeled_counts", - type=str, - default=None, - help="path to unlabeled data [Cells, Genes]. \ - [npy, csv, h5ad, loom]. \ - if provided, uses interpolation consistency training.", - ) - parser.add_argument( - "--unlabeled_genes", - type=str, - default=None, - help="path to gene names for the unlabeled data.\ - if not provided, assumes same as `input_counts`.", - ) - parser.add_argument( - "--unlabeled_domain", - type=str, - help="path to a CSV of integer domain labels for each data point in `unlabeled_counts`.", - required=False, - default=None, - ) - parser.add_argument( - "--unsup_max_weight", - type=float, - default=2.0, - help="maximum weight for the unsupervised component of IC training.", - ) - parser.add_argument( - "--unsup_mean_teacher", - action="store_true", - help="use a mean teacher for IC training.", - ) - parser.add_argument( - "--ssl_method", - type=str, - default="mixmatch", - help='semi-supervised learning method to use. {"mixmatch", "ict"}.', - ) - parser.add_argument( - "--ssl_config", - type=str, - default=None, - help="path to a YAML configuration file of kwargs for the SSL method.", - ) - args = parser.parse_args() - - print(args) - print(parser.format_values()) - - COMMANDS = [ - "train_tissue_independent", - "train_tissue_dependent", - "train_tissue_specific", - "predict_cell_types", - ] - - if args.command not in COMMANDS: - raise ValueError("%s is not a valid command." % args.command) - - ##################################### - # LOAD DATA - ##################################### - - X_raw = load_data(args.input_counts) - - print("Loaded data.") - print("%d cells and %d genes in raw data." % X_raw.shape) - gene_names = np.loadtxt(args.input_gene_names, dtype="str") - print("Loaded gene names for the raw data. %d genes." % len(gene_names)) - - if args.genes_to_use is not None: - genes_to_use = np.loadtxt(args.genes_to_use, dtype="str") - print( - "Using a subset of %d genes as specified in \n %s." - % (len(genes_to_use), args.genes_to_use) - ) - else: - genes_to_use = gene_names - - if args.genes_to_use is not None: - # Filter the input matrix to use only the specified genes - print("Using %d genes for classification." % len(genes_to_use)) - gnl = gene_names.tolist() - keep_idx = np.array([gnl.index(x) for x in genes_to_use]) - X = X_raw[:, keep_idx] - else: - # leave all genes in the matrix - X = X_raw - - # Load metadata and identify output classes - metadata = pd.read_csv( - args.training_metadata, - ) - lower_groups = np.unique(metadata[args.lower_group]).tolist() - - # load domain labels if applicable - if args.input_domain_group is not None: - if args.input_domain_group not in metadata.columns: - msg = f"{args.input_domain_group} is not a column in `training_metadata`" - raise ValueError(msg) - else: - input_domain = np.array(metadata[args.input_domain_group]) - else: - input_domain = None - - # Load any provided unlabeled data for semi-supervised learning - if args.unlabeled_counts is not None: - unlabeled_counts = load_data(args.unlabeled_counts) - print("%d cells, %d genes in unlabeled data." % unlabeled_counts.shape) - - # parse any semi-supervised learning specific parameters - if args.ssl_config is not None: - print(f"Loading Semi-Supervised Learning parameters for {args.ssl_method}") - with open(args.ssl_config, "r") as f: - ssl_kwargs = yaml.load(f, Loader=yaml.Loader) - print("SSL kwargs:") - for k, v in ssl_kwargs.items(): - print(f"{k}\t\t:\t\t{v}") - print() - else: - ssl_kwargs = {} - - else: - unlabeled_counts = None - ssl_kwargs = {} - - if args.unlabeled_genes is not None and unlabeled_counts is not None: - # Contruct a matrix using the unlabeled counts where columns - # correspond to the same gene in `input_counts`. - print("Subsetting unlabeled counts to genes used for training...") - unlabeled_genes = np.loadtxt( - args.unlabeled_genes, - delimiter=",", - dtype="str", - ) - unlabeled_counts = utils.build_classification_matrix( - X=unlabeled_counts, - model_genes=genes_to_use, - sample_genes=unlabeled_genes, - ) - if args.unlabeled_domain is not None: - unlabeled_domain = np.loadtxt( - args.unlabeled_domain, - ).astype(np.int) - else: - unlabeled_domain = None - else: - unlabeled_domain = None - - # prepare output paths - if not os.path.exists(args.out_path): - os.mkdir(args.out_path) - - sub_dirs = [ - "tissues", - "tissue_independent_no_dropout", - "tissue_dependent", - "tissue_ind_class_optimums", - ] - for sd in sub_dirs: - if not os.path.exists(osp.join(args.out_path, sd)): - os.mkdir(osp.join(args.out_path, sd)) - - ##################################### - # TISSUE INDEPENDENT CLASSIFIERS - ##################################### - - if args.command == "train_tissue_independent": - train_tissue_independent_cv( - X, - metadata, - osp.join(args.out_path, "tissue_independent"), - balanced_classes=args.balance_classes, - weighted_classes=args.weight_classes, - batch_size=args.batch_size, - n_epochs=args.n_epochs, - init_dropout=args.init_dropout, - lower_group=args.lower_group, - n_hidden=args.n_hidden, - n_layers=args.n_layers, - lr=args.lr, - optimizer_name=args.optimizer, - weight_decay=args.weight_decay, - residual=args.residual, - track_running_stats=args.track_running_stats, - mixup_alpha=args.mixup_alpha, - unlabeled_counts=unlabeled_counts, - unsup_max_weight=args.unsup_max_weight, - unsup_mean_teacher=args.unsup_mean_teacher, - ssl_method=args.ssl_method, - ssl_kwargs=ssl_kwargs, - input_domain=input_domain, - unlabeled_domain=unlabeled_domain, - ) - - ##################################### - # PRETRAINED MODEL PREDICTION - ##################################### - - if args.command == "predict_cell_types": - if args.model_path is None: - raise ValueError("`model_path` required.") - if args.training_gene_names is None: - raise ValueError("must supply `training_gene_names`.") - training_genes = np.loadtxt( - args.training_gene_names, delimiter=",", dtype="str" - ).tolist() - - X = utils.build_classification_matrix( - X=X, - model_genes=training_genes, - sample_genes=gene_names, - ) - - predict_cell_types( - X, - model_path=args.model_path, - out_path=args.out_path, - lower_group_labels=lower_groups, - n_hidden=args.n_hidden, - n_layers=args.n_layers, - residual=args.residual, - ) - - -######################################################### -# __main__ -######################################################### - - -if __name__ == "__main__": - - main() diff --git a/build/lib/scnym/model.py b/build/lib/scnym/model.py deleted file mode 100644 index e94dde1..0000000 --- a/build/lib/scnym/model.py +++ /dev/null @@ -1,603 +0,0 @@ -import torch -import torch.nn as nn -from typing import Callable, Iterable, Union, Tuple -import logging - -logger = logging.getLogger(__name__) - - -class ResBlock(nn.Module): - """Residual block. - - References - ---------- - Deep Residual Learning for Image Recognition - Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - arXiv:1512.03385 - """ - - def __init__( - self, - n_inputs: int, - n_hidden: int, - ) -> None: - """Residual block with fully-connected neural network - layers. - - Parameters - ---------- - n_inputs : int - number of input dimensions. - n_hidden : int - number of hidden dimensions in the Residual Block. - - Returns - ------- - None. - """ - super(ResBlock, self).__init__() - - self.n_inputs = n_inputs - self.n_hidden = n_hidden - - # Build the initial projection layer - self.linear00 = nn.Linear(self.n_inputs, self.n_hidden) - self.norm00 = nn.BatchNorm1d(num_features=self.n_hidden) - self.relu00 = nn.ReLU(inplace=True) - - # Map from the latent space to output space - self.linear01 = nn.Linear(self.n_hidden, self.n_hidden) - self.norm01 = nn.BatchNorm1d(num_features=self.n_hidden) - self.relu01 = nn.ReLU(inplace=True) - return - - def forward( - self, - x: torch.FloatTensor, - ) -> torch.FloatTensor: - """Residual block forward pass. - - Parameters - ---------- - x : torch.FloatTensor - [Batch, self.n_inputs] - - Returns - ------- - o : torch.FloatTensor - [Batch, self.n_hidden] - """ - identity = x - - # Project input to the latent space - o = self.norm00(self.linear00(x)) - o = self.relu00(o) - - # Project from the latent space to output space - o = self.norm01(self.linear01(o)) - - # Make this a residual connection - # by additive identity operation - o += identity - return self.relu01(o) - - -class CellTypeCLF(nn.Module): - """Cell type classifier from expression data. - - Attributes - ---------- - n_genes : int - number of input genes in the model. - n_cell_types : int - number of output classes in the model. - n_hidden : int - number of hidden units in the model. - n_layers : int - number of hidden layers in the model. - init_dropout : float - dropout proportion prior to the first layer. - residual : bool - use residual connections. - """ - - def __init__( - self, - n_genes: int, - n_cell_types: int, - n_hidden: int = 256, - n_hidden_init: int = 256, - n_layers: int = 2, - init_dropout: float = 0.0, - residual: bool = False, - batch_norm: bool = True, - track_running_stats: bool = True, - n_decoder_layers: int = 0, - use_raw_counts: bool = False, - ) -> None: - """ - Cell type classifier from expression data. - Linear layers with batch norm and dropout. - - Parameters - ---------- - n_genes : int - number of genes in the input - n_cell_types : int - number of cell types for the output - n_hidden : int - number of hidden unit - n_hidden_init : - number of hidden units for the initial encoding layer. - n_layers : int - number of hidden layers. - init_dropout : float - dropout proportion prior to the first layer. - residual : bool - use residual connections. - batch_norm : bool - use batch normalization in hidden layers. - track_running_stats : bool - track running statistics in batch norm layers. - n_decoder_layers : int - number of layers in the decoder. - use_raw_counts : bool - provide raw counts as input. - - Returns - ------- - None. - """ - super(CellTypeCLF, self).__init__() - - self.n_genes = n_genes - self.n_cell_types = n_cell_types - self.n_hidden = n_hidden - self.n_hidden_init = n_hidden_init - self.n_decoder_layers = n_decoder_layers - self.n_layers = n_layers - self.init_dropout = init_dropout - self.residual = residual - self.batch_norm = batch_norm - self.track_running_stats = track_running_stats - self.use_raw_counts = use_raw_counts - - # simulate technical dropout of scRNAseq - self.init_dropout = nn.Dropout(p=self.init_dropout) - - # Define a vanilla NN layer with batch norm, dropout, ReLU - vanilla_layer = [ - nn.Linear(self.n_hidden, self.n_hidden), - ] - if self.batch_norm: - vanilla_layer += [ - nn.BatchNorm1d( - num_features=self.n_hidden, - track_running_stats=self.track_running_stats, - ), - ] - vanilla_layer += [ - nn.Dropout(), - nn.ReLU(inplace=True), - ] - - # Define a residual NN layer with batch norm, dropout, ReLU - residual_layer = [ - ResBlock(self.n_hidden, self.n_hidden), - ] - if self.batch_norm: - residual_layer += [ - nn.BatchNorm1d( - num_features=self.n_hidden, - track_running_stats=self.track_running_stats, - ), - ] - - residual_layer += [ - nn.Dropout(), - nn.ReLU(inplace=True), - ] - - # Build the intermediary layers of the model - if self.residual: - hidden_layer = residual_layer - else: - hidden_layer = vanilla_layer - - hidden_layers = hidden_layer * (self.n_layers - 1) - - # Build the classifier `nn.Module`. - self.embed = nn.Sequential( - nn.Linear(self.n_genes, self.n_hidden_init), - nn.BatchNorm1d( - num_features=self.n_hidden_init, - track_running_stats=self.track_running_stats, - ), - nn.Dropout(), - nn.ReLU(inplace=True), - nn.Linear(self.n_hidden_init, self.n_hidden), - nn.BatchNorm1d( - num_features=self.n_hidden, - track_running_stats=self.track_running_stats, - ), - nn.Dropout(), - nn.ReLU(inplace=True), - *hidden_layers, - ) - - dec_hidden = hidden_layer * (self.n_decoder_layers - 1) - final_clf = nn.Linear(self.n_hidden, self.n_cell_types) - self.classif = nn.Sequential( - *dec_hidden, - final_clf, - ) - return - - def forward( - self, - x: torch.FloatTensor, - return_embed: bool = False, - ) -> torch.FloatTensor: - """Perform a forward pass through the model - - Parameters - ---------- - x : torch.FloatTensor - [Batch, self.n_genes] - return_embed : bool - return the embedding and the class predictions. - - Returns - ------- - pred : torch.FloatTensor - [Batch, self.n_cell_types] - embed : torch.FloatTensor, optional - [Batch, n_hidden], only returned if `return_embed`. - """ - # add initial dropout noise - if self.init_dropout.p > 0 and not self.use_raw_counts: - # counts are log1p(CPM) - # expm1 to normed counts - x = torch.expm1(x) - x = self.init_dropout(x) - # renorm to log1p CPM - size = torch.sum(x, dim=1).reshape(-1, 1) - prop_input_ = x / size - norm_input_ = prop_input_ * 1e6 - x = torch.log1p(norm_input_) - elif self.init_dropout.p > 0 and self.use_raw_counts: - x = self.init_dropout(x) - else: - # we don't need to do initial dropout - pass - x_embed = self.embed(x) - pred = self.classif(x_embed) - - if return_embed: - r = ( - pred, - x_embed, - ) - else: - r = pred - return r - - -class GradReverse(torch.autograd.Function): - """Layer that reverses and scales gradients before - passing them up to earlier ops in the computation graph - during backpropogation. - """ - - @staticmethod - def forward(ctx, x, weight): - """ - Perform a no-op forward pass that stores a weight for later - gradient scaling during backprop. - - Parameters - ---------- - x : torch.FloatTensor - [Batch, Features] - weight : float - weight for scaling gradients during backpropogation. - stored in the "context" ctx variable. - - Notes - ----- - We subclass `Function` and use only @staticmethod as specified - in the newstyle pytorch autograd functions. - https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function - - We define a "context" ctx of the class that will hold any values - passed during forward for use in the backward pass. - - `x.view_as(x)` and `*1` are necessary so that `GradReverse` - is actually called - `torch.autograd` tries to optimize backprop and - excludes no-ops, so we have to trick it :) - """ - # store the weight we'll use in backward in the context - ctx.weight = weight - return x.view_as(x) * 1.0 - - @staticmethod - def backward(ctx, grad_output): - """Return gradients - - Returns - ------- - rev_grad : torch.FloatTensor - reversed gradients scaled by `weight` passed in `.forward()` - None : None - a dummy "gradient" required since we passed a weight float - in `.forward()`. - """ - # here scale the gradient and multiply by -1 - # to reverse the gradients - return (grad_output * -1 * ctx.weight), None - - -class DANN(nn.Module): - """Build a domain adaptation neural network""" - - def __init__( - self, - model: CellTypeCLF, - n_domains: int = 2, - weight: float = 1.0, - n_layers: int = 1, - ) -> None: - """Build a domain adaptation neural network using - the embedding of a provided model. - - Parameters - ---------- - model : CellTypeCLF - cell type classification model. - n_domains : int - number of domains to adapt. - weight : float - weight for reversed gradients. - n_layers : int - number of hidden layers in the network. - - Returns - ------- - None. - """ - super(DANN, self).__init__() - - self.model = model - self.n_domains = n_domains - - self.embed = model.embed - - hidden_layers = [ - nn.Linear(self.model.n_hidden, self.model.n_hidden), - nn.ReLU(), - ] * n_layers - - self.domain_clf = nn.Sequential( - *hidden_layers, - nn.Linear(self.model.n_hidden, self.n_domains), - ) - return - - def set_rev_grad_weight( - self, - weight: float, - ) -> None: - """Set the weight term used after reversing gradients""" - self.weight = weight - return - - def forward( - self, - x: torch.FloatTensor, - ) -> Tuple[torch.FloatTensor, torch.FloatTensor]: - """Perform a forward pass. - - Parameters - ---------- - x : torch.FloatTensor - [Batch, Features] input. - - Returns - ------- - domain_pred : torch.FloatTensor - [Batch, n_domains] logits. - x_embed : torch.FloatTensor - [Batch, n_hidden] - """ - # get the model embedding - x_embed = self.embed(x) - # reverse gradients and scale by a weight - # domain_pred -> x_rev -> GradReverse -> x_embed - # d+ -> d+ -> d- -> d- - x_rev = GradReverse.apply( - x_embed, - self.weight, - ) - # classify the domains - domain_pred = self.domain_clf(x_rev) - return domain_pred, x_embed - - -class AE(nn.Module): - """Build an autoencoder that shares the classifier embedding. - - Attributes - ---------- - model : CellTypeCLF - cell type classification model. - n_layers : int - number of hidden layers in the network. - n_hidden : int - number of hidden units in each hidden layer. - defaults to the hidden layer size of the model. - dispersion : torch.nn.Parameter - [model.n_genes,] dispersion parameters for each gene. - `None` unless `model.use_raw_counts`. - latent_libsize : bool - use a latent variable to store library size. if `False`, - uses the observed library size to scale abundance profiles. - """ - - noise_scale = 1.0 - - def __init__( - self, - model: CellTypeCLF, - n_layers: int = 2, - n_hidden: int = None, - n_domains: int = None, - latent_libsize: bool = False, - ) -> None: - """Build an autoencoder using the embedding of a provided model. - - Parameters - ---------- - model : CellTypeCLF - cell type classification model. - n_layers : int - number of hidden layers in the network. - n_hidden : int - number of hidden units in each hidden layer. - defaults to the hidden layer size of the model. - n_domains : int - number of domain covariates to include. - latent_libsize : bool - use a latent variable to store library size. if `False`, - uses the observed library size to scale abundance profiles. - - Returns - ------- - None. - - Notes - ----- - Maps gene expression vectors to an embedding using the same - trunk as the classification model. If `model.use_raw_counts`, - reconstructs library depth using the latent library size and - also learns a set of dispersion parameters for each gene. - Reconstructs profiles using a decoder model that mirrors the - classification embedding trunk. - """ - super(AE, self).__init__() - - self.model = model - self.n_hidden = self.model.n_hidden if n_hidden is None else n_hidden - self.latent_libsize = latent_libsize - self.n_domains = n_domains if n_domains is not None else 0 - - # extract the embedder from the classification model - self.embed = self.model.embed - - # append decoder layers - dec_input = [ - nn.Linear(self.model.n_hidden + self.n_domains, self.n_hidden), - nn.ReLU(), - ] - - hidden_layers = [ - nn.Linear(self.model.n_hidden, self.n_hidden), - nn.ReLU(), - ] * (n_layers - 1) - - self.decoder = nn.Sequential( - *dec_input, - *hidden_layers, - nn.Linear(self.n_hidden, self.model.n_genes), - ) - - if self.model.use_raw_counts: - # initialize dispersion parameters from a unit Gaussian - self.dispersion = nn.Parameter(torch.randn(self.model.n_genes)) - else: - self.dispersion = torch.ones((1,)) - - # encode log(library_size) as a latent variable - self.libenc = nn.Sequential( - nn.Linear(self.model.n_genes + self.n_domains, 1), - nn.ReLU(), - ) - - return - - def noise( - self, - x_embed: torch.FloatTensor, - ) -> torch.FloatTensor: - """Add white noise to the latent embedding""" - eps = torch.randn_like(x_embed) * self.noise_scale - return torch.nn.functional.relu(x_embed + eps) - - def forward( - self, - x: torch.FloatTensor, - x_embed: torch.FloatTensor = None, - x_domain: torch.FloatTensor = None, - ) -> Tuple[torch.FloatTensor, torch.FloatTensor]: - """Perform a forward pass. - - Parameters - ---------- - x : torch.FloatTensor - [Batch, Features] input. - x_embed : torch.FloatTensor, optional. - [Batch, n_hidden] embedding. - x_domain : torch.FloatTensor, optional. - [Batch, Domains] one-hot labels. - used for conditional decoding. - - Returns - ------- - reconstructed_profiles : torch.FloatTensor - [Batch, Features] abundance profiles [0, 1]. - scaled_profiles : torch.FloatTensor - [Batch, Features] profiles scaled by latent depths. - dispersion : torch.FloatTensor - [Features,] dispersion parameters for each gene. - x_embed : torch.FloatTensor - [Batch, n_hidden] - """ - # get the model embedding, avoid recomputing if a precomputed - # embedding is passed in - x_embed = self.embed(x) if x_embed is None else x_embed - - if self.training: - x_embed = self.noise(x_embed) - - # check the dimensions are sane - if x_embed.size(-1) > 2048: - logger.warn( - f"AE `x_embed` dimension is larger than expected: {x_embed.size(1)}" - ) - - # add domain covariates if provided and initialized to use covars - if x_domain is None and self.n_domains > 0: - msg = "Must provide domain covariates for a conditional model. Received `None`." - raise TypeError(msg) - if x_domain is not None and self.n_domains > 0: - logger.debug(f"Domain covariates added. Size {x_domain.size()}.") - x_embed = torch.cat([x_embed, x_domain], dim=-1) - x2libsz = torch.cat([x, x_domain], dim=-1) - else: - x2libsz = x - - # reconstruct gene expression abundance profiles, first with raw - # activations - x_rec = self.decoder(x_embed) - # use softmax to go from logits to relative abundance profiles - x_rec = nn.functional.softmax(x_rec, dim=1) - - if self.latent_libsize: - # `libenc` returns the log of the library size - lib_size = self.libenc(x2libsz) - lib_size = torch.clamp(lib_size, max=12) # numerical stability - else: - lib_size = torch.log(x.sum(1)).view(-1, 1) # [Cells, 1] - x_scaled = x_rec * torch.exp(lib_size) - - return x_rec, x_scaled, torch.exp(self.dispersion), x_embed diff --git a/build/lib/scnym/predict.py b/build/lib/scnym/predict.py deleted file mode 100644 index 8b84b42..0000000 --- a/build/lib/scnym/predict.py +++ /dev/null @@ -1,216 +0,0 @@ -import numpy as np -from scipy import sparse -import os -import torch -import torch.nn.functional as F -from typing import Union -from .model import CellTypeCLF -from .dataprep import SingleCellDS -import tqdm - - -class Predicter(object): - """Predict cell types from expression data using `CellTypeCLF`. - - Attributes - ---------- - model_weights : list - paths to model weights for classification. - labels : list - str labels for output classes. - n_cell_types : int - number of output classes. - n_genes : int - number of input genes. - models : list - `nn.Module` for each set of weights in `.model_weights`. - """ - - def __init__( - self, - model_weights: Union[str, list, tuple], - n_genes: int = None, - n_cell_types: int = None, - labels: list = None, - **kwargs, - ) -> None: - """ - Predict cell types using pretrained weights for `CellTypeCLF`. - - Parameters - ---------- - model_weights : str, list, tuple - paths to pre-trained model weights. if more than one - path to weights is provided, predicts using an ensemble - of models. - n_genes : int - number of genes in the input frame. - n_cell_types : int - number of cell types in the output. - labels : list - string labels corresponding to each cell type output - **kwargs passed to `model.CellTypeCLF` - """ - if type(model_weights) == str: - self.model_weights = [model_weights] - else: - self.model_weights = model_weights - self.labels = labels - - if n_cell_types is None: - # get the number of output nodes from the pretrained model - print( - "Assuming `n_cell_types` is the same as in the \ - pretrained model weights." - ) - params = torch.load(self.model_weights[0], map_location="cpu") - fkey = list(params.keys())[-1] - self.n_cell_types = len(params[fkey]) - else: - self.n_cell_types = n_cell_types - - # check that all the specified weights exist - for weights in self.model_weights: - if not os.path.exists(weights): - raise FileNotFoundError() - - if n_genes is None: - # get the number of input genes from the model weights - print( - "Assuming `n_genes` is the same as in the \ - pretrained model weights." - ) - params = torch.load(model_weights, map_location="cpu") - fkey = list(params.keys())[0] - self.n_genes = params[fkey].shape[1] - else: - self.n_genes = n_genes - - # Load each set of weights in `model_weights` into a model - # to use in an ensemble prediction. - self.models = [] - for weights in self.model_weights: - model = CellTypeCLF( - n_genes=self.n_genes, - n_cell_types=self.n_cell_types, - **kwargs, - ) - model.load_state_dict(torch.load(weights, map_location="cpu")) - - if torch.cuda.is_available(): - model = model.cuda() - - self.models.append(model.eval()) - - return - - def predict( - self, - X: Union[np.ndarray, sparse.csr.csr_matrix, torch.FloatTensor], - output: str = None, - batch_size: int = 1024, - **kwargs, - ) -> (np.ndarray, list): - """ - Predict cell types given a matrix `X`. - - Parameters - ---------- - X : np.ndarray, sparse.csr.csr_matrix, torch.FloatTensor - [Cells, Genes] - output : str - additional output to include as an optional third tuple. - ('prob', 'score'). - batch_size : int - batch size to use for predictions. - - Returns - ------- - predictions : np.ndarray - [Cells,] ints of predicted class - names : list - [Cells,] str of predicted class names - probabilities : np.ndarray - [Cells, Types] probabilities (softmax outputs). - - Notes - ----- - acceptable **kwarg for legacy compatibility -- - return_prob : bool - return probabilities as an optional third output. - """ - if not X.shape[1] == self.n_genes: - gs = (X.shape[1], self.n_genes) - raise ValueError("%d genes in X, %d genes in model." % gs) - - if "return_prob" in kwargs: - return_prob = kwargs["return_prob"] - else: - return_prob = None - - if output not in ["prob", "score"] and output is not None: - msg = f"{output} is not a valid additional output." - raise ValueError(msg) - - # build a SingleCellDS so we can load cells onto the - # GPU in batches - ds = SingleCellDS(X=X, y=np.zeros(X.shape[0])) - dl = torch.utils.data.DataLoader( - ds, - batch_size=batch_size, - ) - - # For each cell vector, compute a prediction - # and a class probability vector. - predictions = [] - scores = [] - probabilities = [] - - # For each cell, compute predictions - for data in tqdm.tqdm(dl, desc="Finding cell types"): - - X_batch = data["input"] - - if torch.cuda.is_available(): - X_batch = X_batch.cuda() - - # take an average prediction across all models provided - outs = [] - for model in self.models: - out = model(X_batch) - outs.append(out) - outs = torch.stack(outs, dim=0) - out = torch.mean(outs, dim=0) - - # save most likely prediction and output probabilities - scores.append(out.detach().cpu().numpy()) - - _, pred = torch.max(out, 1) - predictions.append(pred.detach().cpu().numpy()) - - probs = F.softmax(out, dim=1) - probabilities.append(probs.detach().cpu().numpy()) - - predictions = np.concatenate(predictions, axis=0) # [Cells,] - scores = np.concatenate(scores, axis=0) # [Cells, Types] - probabilities = np.concatenate(probabilities, axis=0) # [Cells, Types] - - if self.labels is not None: - names = [] - for i in range(len(predictions)): - names += [self.labels[predictions[i]]] - else: - names = None - - # Parse the arguments to determine what to return - # N.B. that `return_prob` here is to support legacy code - # and may be removed in the future. - if return_prob is True: - return predictions, names, probabilities - elif output is not None: - if output == "prob": - return predictions, names, probabilities - elif output == "score": - return predictions, names, scores - else: - return predictions, names diff --git a/build/lib/scnym/scnym_ad.py b/build/lib/scnym/scnym_ad.py deleted file mode 100644 index 04a4f42..0000000 --- a/build/lib/scnym/scnym_ad.py +++ /dev/null @@ -1,217 +0,0 @@ -"""scNym model training from standard anndata objects""" -import anndata -import os -import os.path as osp -import uuid -import configargparse -import numpy as np -import pandas as pd - -from .main import train_cv, train_all -from .model import CellTypeCLF -from .utils import build_classification_matrix -from sklearn.model_selection import StratifiedKFold - - -def make_parser(): - parser = configargparse.ArgParser( - description="train an scNym cell type classification model." - ) - parser.add_argument( - "--config", - type=str, - is_config_file=True, - required=False, - help="path to a configuration file.", - ) - parser.add_argument( - "--data", type=str, help="path to an h5ad [Cells, Features] object." - ) - parser.add_argument( - "--groupby", - type=str, - help="categorical feature in `adata.obs` to use for classifier training.", - ) - parser.add_argument("--out_path", type=str, help="path for outputs.") - parser.add_argument( - "--batch_size", - type=int, - default=256, - help="batch size for training", - ) - parser.add_argument( - "--n_epochs", - type=int, - default=200, - help="number of epochs for training", - ) - parser.add_argument( - "--init_dropout", - type=float, - default=0.0, - help="initial dropout to perform on gene inputs", - ) - parser.add_argument( - "--n_hidden", - type=int, - default=128, - help="number of hidden units in the classifier", - ) - parser.add_argument( - "--n_layers", - type=int, - default=2, - help="number of hidden layers in the model", - ) - parser.add_argument( - "--residual", - action="store_true", - help="use residual layers in the model", - ) - parser.add_argument( - "--weight_decay", - type=float, - default=1e-5, - help="weight decay applied by the optimizer", - ) - parser.add_argument( - "--weight_classes", - type=bool, - default=True, - help="weight loss based on relative class abundance.", - ) - parser.add_argument( - "--mixup_alpha", - type=float, - default=None, - help="alpha parameter for MixUp training. if set performs MixUp, otherwise does not.", - ) - parser.add_argument( - "--unlabeled_counts", - type=str, - default=None, - help="path to h5ad [Cells, Features] object of unlabeled data.", - ) - parser.add_argument( - "--unsup_max_weight", - type=float, - default=2.0, - help="maximum weight for the unsupervised component of IC training.", - ) - parser.add_argument( - "--unsup_mean_teacher", - type=bool, - default=True, - help="use a mean teacher for IC training.", - ) - parser.add_argument( - "--cross_val_train", - action="store_true", - ) - return parser - - -def main(): - parser = make_parser() - args = parser.parse_args() - - adata = anndata.read_h5ad(args.data) - print(f"{adata.shape[0]} cells, {adata.shape[1]} genes in the training data.") - - if args.groupby not in adata.obs: - msg = f"{args.groupby} not in `adata.obs`" - raise ValueError(msg) - - os.makedirs(args.out_path, exist_ok=True) - - if args.unlabeled_counts is None: - unlabeled_counts = None - else: - # load unlabeled counts and build a matrix that follows - # gene dimension ordering of the training data - unlabeled_adata = anndata.read_h5ad(args.unlabeled_counts) - unlabeled_counts = build_classification_matrix( - X=unlabeled_adata.X - if type(unlabeled_adata.X) == np.ndarray - else unlabeled_adata.X.toarray(), - model_genes=np.array(adata.var_names), - sample_genes=np.array(unlabeled_adata.var_names), - ) - - X = adata.X if type(adata.X) == np.ndarray else adata.X.toarray() - y = pd.Categorical(adata.obs[args.groupby]).codes - - model_params = { - "n_hidden": args.n_hidden, - "residual": args.residual, - "n_layers": args.n_layers, - "init_dropout": args.init_dropout, - } - - if args.cross_val_train: - kf = StratifiedKFold(n_splits=5, shuffle=True) - fold_indices = list(kf.split(X, y)) - - fold_eval_acc, fold_eval_losses = train_cv( - X=X, - y=y, - batch_size=args.batch_size, - n_epochs=args.n_epochs, - weight_decay=args.weight_decay, - ModelClass=CellTypeCLF, - fold_indices=fold_indices, - out_path=args.out_path, - n_genes=adata.shape[1], - mixup_alpha=args.mixup_alpha, - unlabeled_counts=unlabeled_counts, - unsup_max_weight=args.unsup_max_weight, - unsup_mean_teacher=args.unsup_mean_teacher, - weighted_classes=args.weight_classes, - **model_params, - ) - np.savetxt( - osp.join( - args.out_path, - "fold_eval_losses.csv", - ), - fold_eval_losses, - ) - np.savetxt( - osp.join( - args.out_path, - "fold_eval_acc.csv", - ), - fold_eval_acc, - ) - - val_loss, val_acc = train_all( - X=X, - y=y, - batch_size=args.batch_size, - n_epochs=args.n_epochs, - weight_decay=args.weight_decay, - ModelClass=CellTypeCLF, - out_path=args.out_path, - n_genes=adata.shape[1], - mixup_alpha=args.mixup_alpha, - unlabeled_counts=unlabeled_counts, - unsup_max_weight=args.unsup_max_weight, - unsup_mean_teacher=args.unsup_mean_teacher, - weighted_classes=args.weight_classes, - **model_params, - ) - print(f"Final validation loss: {val_loss:08}") - print(f"Final validation acc : {val_acc:08}") - - # get exp id - exp_id = uuid.uuid4() - res = pd.DataFrame( - {"val_acc": val_acc, "val_loss": val_loss}, - index=[exp_id], - ).to_csv( - osp.join( - args.out_path, - "all_data_val_results.csv", - ) - ) - return diff --git a/build/lib/scnym/trainer.py b/build/lib/scnym/trainer.py deleted file mode 100644 index c3c8522..0000000 --- a/build/lib/scnym/trainer.py +++ /dev/null @@ -1,1412 +0,0 @@ -import numpy as np -import os -import os.path as osp -import torch -import torch.nn as nn -import torch.nn.functional as F -import json -import logging -from typing import Callable, Iterable, Union, List -from .dataprep import SampleMixUp -from .utils import compute_entropy_of_mixing -from .model import CellTypeCLF, DANN -import copy -from torch.utils.tensorboard import SummaryWriter - -from .dataprep import SampleMixUp -from .utils import compute_entropy_of_mixing -from .model import CellTypeCLF, DANN, AE -from .losses import * - - -logger = logging.getLogger(__name__) - - -class Trainer(object): - """ - Trains a PyTorch model. - - Attributes - ---------- - model : nn.Module - model with required `.forward(...)` method. - criterion : Callable - loss criterion to optimize. - optimizer : torch.optim.Optimizer - optimizer for the model parameters. - dataloaders : dict - keyed by ['train', 'val'] with values corresponding - to `torch.utils.data.DataLoader` for training - and validation sets. - out_path : str - output path for best model. - n_epochs : int - number of epochs for training. - min_epochs : int - minimum number of epochs before saving weights. - patience : int - maximum number of epochs to wait before early stopping. - if `None`, infinite patience is used (up to `n_epochs`). - waiting_time : int - number of epochs since the last best val loss. - reg_criterion : Callable - criterion to penalize layer weights. - use_gpu : bool - use CUDA acceleration. - verbose : bool - write all batch losses to stdout. - save_freq : int - Number of epochs between model checkpoints. Default = 10. - scheduler : learning rate scheduler. - """ - - def __init__( - self, - model: nn.Module, - criterion: Callable, - optimizer: torch.optim.Optimizer, - dataloaders: dict, - out_path: str, - batch_transformers: dict = {}, - n_epochs: int = 50, - min_epochs: int = 0, - patience: int = None, - exp_name: str = "", - reg_criterion: Callable = None, - use_gpu: bool = torch.cuda.is_available(), - verbose: bool = False, - save_freq: int = 10, - scheduler: torch.optim.lr_scheduler = None, - tb_writer: str = None, - ) -> None: - """ - Trains a PyTorch `nn.Module` object provided in `model` - on training and testing sets provided in `dataloaders` - using `criterion` and `optimizer`. - - Saves model weight snapshots every `save_freq` epochs and saves the - weights with the best testing loss at the end of training. - - Parameters - ---------- - model : nn.Module - model with required `.forward(...)` method. - criterion : Callable - loss criterion to optimize. - optimizer : torch.optim.Optimizer - optimizer for the model parameters. - dataloaders : dict - keyed by ['train', 'val'] with values corresponding - to `torch.utils.data.DataLoader` for training - and validation sets. - out_path : str - output path for best model. - batch_transformers : dict - apply transforms to minibatch inputs and targets. - keys are ['train', 'val'], values are Callable. - n_epochs : int - number of epochs for training. - min_epochs : int - minimum number of epochs before saving weights. - patience : int - maximum number of epochs to wait before early stopping. - if `None`, infinite patience is used (up to `n_epochs`). - reg_criterion : callable - criterion to penalize layer weights. - use_gpu : bool - use CUDA acceleration. - verbose : bool - write all batch losses to stdout. - save_freq : int - Number of epochs between model checkpoints. Default = 10. - scheduler : torch.optim.lr_scheduler - learning rate schedule. - - Returns - ------- - None. - """ - self.model = model - self.optimizer = optimizer - self.criterion = criterion - self.n_epochs = n_epochs - self.min_epochs = min_epochs - self.patience = patience if patience is not None else n_epochs - self.waiting_time = 0 - self.dataloaders = dataloaders - self.batch_transformers = batch_transformers - self.out_path = out_path - self.use_gpu = use_gpu - self.verbose = verbose - self.save_freq = save_freq - self.best_acc = 0.0 - self.best_loss = 1.0e10 - self.scheduler = scheduler - self.reg_criterion = reg_criterion - if tb_writer is not None: - self.tb_writer = SummaryWriter(log_dir=tb_writer) - os.makedirs(tb_writer, exist_ok=True) - else: - self.tb_writer = None - - if not os.path.exists(self.out_path): - os.mkdir(self.out_path) - # initialize log - - self.log_path = os.path.join(self.out_path, "_".join([exp_name, "log.csv"])) - with open(self.log_path, "w") as f: - header = "Epoch,Running_Loss,Mode\n" - f.write(header) - - self.parameters = { - "out_path": out_path, - "exp_name": exp_name, - "n_epochs": n_epochs, - "use_cuda": self.use_gpu, - "train_batch_size": self.dataloaders["train"].batch_size, - "val_batch_size": self.dataloaders["val"].batch_size, - "train_batch_sampler": str(type(self.dataloaders["train"].sampler)), - "val_batch_sampler": str(type(self.dataloaders["val"].sampler)), - "optimizer_type": str(type(self.optimizer)), - "learning_rate": self.optimizer.param_groups[0]["lr"], - "model_hidden": self.model.n_hidden, - "model_ngenes": self.model.n_genes, - "model_ncelltypes": self.model.n_cell_types, - } - - # write the log file header - with open(self.log_path, "w") as f: - header = "Epoch,Iter,Running_Loss,Mode\n" - f.write(header) - - def train_epoch(self): - """Perform training across one full iteration through - the data. - """ - self.model.train(True) - i = 0 - running_loss = 0.0 - running_corrects = 0.0 - running_total = 0.0 - - btrans = self.batch_transformers.get("train", None) - for data in self.dataloaders["train"]: - # if a batch transformer is present, - # transform the data before use - if btrans is not None: - data = btrans(data) - - inputs = data["input"] - labels = data["output"] # one-hot - - if self.use_gpu: - inputs = inputs.cuda() - labels = labels.cuda() - else: - pass - inputs.requires_grad_() - labels.requires_grad = False - - # zero gradients - self.optimizer.zero_grad() - - # forward pass - outputs = self.model(inputs) - # predictions are the output nodes with - # the highest values - _, predictions = torch.max(outputs, 1) - - # remake an integer version of the labels for quick checking - int_labels = torch.argmax(labels, 1) - - correct = torch.sum(predictions.detach() == int_labels.detach()) - - # compute loss - if self.reg_criterion is not None: - reg_loss = self.reg_criterion(self.model) - loss = self.criterion(outputs, labels) + reg_loss - else: - loss = self.criterion(outputs, labels) - - if self.verbose: - print("batch loss: ", loss.item()) - if np.isnan(loss.data.cpu().numpy()): - raise RuntimeError("NaN loss encountered in training") - - # compute gradients in a backward pass, update parameters - loss.backward() - self.optimizer.step() - - # statistics update - running_loss += loss.item() / inputs.size(0) - running_corrects += float(correct.item()) - running_total += float(labels.size(0)) - - if i % 100 == 0 and self.verbose: - print("Iter : ", i) - print("running_loss : ", running_loss / (i + 1)) - print("running_acc : ", running_corrects / running_total) - print("corrects: %f | total: %f" % (running_corrects, running_total)) - # append to log - with open(self.log_path, "a") as f: - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(running_loss / (i + 1)) - + ",train\n" - ) - i += 1 - - epoch_loss = running_loss / len(self.dataloaders["train"]) - epoch_acc = running_corrects / running_total - - # append to log - with open(self.log_path, "a") as f: - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(running_loss / (i + 1)) - + ",train_epoch\n" - ) - - if self.tb_writer is not None: - self.tb_writer.add_scalar("Loss/train", epoch_loss, self.epoch) - self.tb_writer.add_scalar("Acc/train", epoch_acc, self.epoch) - for i, p in enumerate(self.model.parameters()): - self.tb_writer.add_histogram( - f"Grad/param{i:04}", - p.grad, - self.epoch, - ) - - self.tb_writer.add_scalar( - "lr/lr", - self.optimizer.state_dict()["param_groups"][0]["lr"], - self.epoch, - ) - - if self.verbose: - print("{} Loss : {:.4f}".format("train", epoch_loss)) - print("{} Acc : {:.4f}".format("train", epoch_acc)) - print( - "TRAIN EPOCH corrects: %f | total: %f" - % (running_corrects, running_total) - ) - - @torch.no_grad() - def val_epoch(self): - """Perform a pass through the validation data. - Do not record gradients to speed things up. - """ - self.model.train(False) - i = 0 - running_loss = 0.0 - running_corrects = 0 - running_total = 0 - - btrans = self.batch_transformers.get("val", None) - for data in self.dataloaders["val"]: - # if a batch transformer is present, - # transform the data before use - if btrans is not None: - data = btrans(data) - - inputs = data["input"] - labels = data["output"] # one-hot - if self.use_gpu: - inputs = inputs.cuda() - labels = labels.cuda() - else: - pass - - # zero gradients - self.optimizer.zero_grad() - # forward pass - outputs = self.model(inputs) - _, predictions = torch.max(outputs, 1) - - # remake an integer version of the labels for quick checking - int_labels = torch.argmax(labels, 1) - correct = torch.sum(predictions.detach() == int_labels.detach()) - if self.verbose > 1: - print("PRED\n", predictions[:10, ...]) - print("LABEL\n", int_labels[:10, ...]) - print("CORRECT: ", correct) - - if self.reg_criterion is not None: - reg_loss = self.reg_criterion(self.model) - loss = self.criterion(outputs, labels) + reg_loss - else: - loss = self.criterion(outputs, labels) - - # statistics update - running_loss += loss.item() / inputs.size(0) - running_corrects += int(correct.item()) - running_total += int(labels.size(0)) - - if i % 1 == 10 and self.verbose > 1: - print("Iter : ", i) - print("running_loss : ", running_loss / (i + 1)) - print("running_acc : ", running_corrects / running_total) - print("corrects: %f | total: %f" % (running_corrects, running_total)) - # append to log - with open(self.log_path, "a") as f: - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(running_loss / (i + 1)) - + ",val\n" - ) - i += 1 - - epoch_loss = running_loss / len(self.dataloaders["val"]) - epoch_acc = running_corrects / running_total - # append to log - with open(self.log_path, "a") as f: - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(running_loss / (i + 1)) - + ",val_epoch\n" - ) - - # add one epoch to the waiting time for best loss - # if we had a new best loss, the counter is reset below - self.waiting_time += 1 - if (epoch_loss < self.best_loss) and (self.epoch >= self.min_epochs): - self.best_loss = epoch_loss - self.best_model_wts = self.model.state_dict() - self.waiting_time = 0 - torch.save( - self.model.state_dict(), - os.path.join( - self.out_path, - ("model_weights_" + str(self.epoch).zfill(3) + ".pkl"), - ), - ) - print("Saving best model weights...") - torch.save( - self.model.state_dict(), - os.path.join(self.out_path, "00_best_model_weights.pkl"), - ) - print("Saved best weights.") - - if hasattr(self, "dan_criterion"): - print("Trainer has a `dan_criterion`.") - if self.dan_criterion is not None: - print("Saving DAN weights...") - torch.save( - self.dan_criterion.dann.state_dict(), - os.path.join( - self.out_path, - "02_best_dan_weights.pkl", - ), - ) - - with open(self.log_path, "a") as f: - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(running_loss / (i + 1)) - + ",best_model_weights\n", - ) - - if self.tb_writer is not None: - self.tb_writer.add_text( - "BestWeights", - f"Saved best weights at {self.epoch}, loss {epoch_loss}", - self.epoch, - ) - self.tb_writer.flush() - - elif self.epoch % self.save_freq == 0: - torch.save( - self.model.state_dict(), - os.path.join( - self.out_path, - "model_weights_" + str(self.epoch).zfill(3) + ".pkl", - ), - ) - - elif self.epoch == (self.n_epochs - 1): - torch.save( - self.model.state_dict(), - os.path.join(self.out_path, "01_final_model_weights.pkl"), - ) - if self.verbose: - print(f"{self.waiting_time} epochs since last best weights.\n") - - if self.tb_writer is not None: - self.tb_writer.add_scalar("Loss/val", epoch_loss, self.epoch) - self.tb_writer.add_scalar("Acc/val", epoch_acc, self.epoch) - self.tb_writer.flush() - - if self.verbose: - print("{} Loss : {:.4f}".format("val", epoch_loss)) - print("{} Acc : {:.4f}".format("val", epoch_acc)) - print( - "VAL EPOCH corrects: %f | total: %f" % (running_corrects, running_total) - ) - - def train(self): - for epoch in range(self.n_epochs): - self.epoch = epoch - msg = f"Epoch {epoch}/{self.n_epochs-1}" - p_complete = epoch / self.n_epochs - n_bars = int(np.floor(30 * p_complete)) - msg += "|" + "-" * n_bars + "_" * (30 - n_bars) + "|" - # print a new line so the progress bar isn't overwritten - # on the final stdout - end_char = "\n" if epoch == (self.n_epochs - 1) else "\r" - print(msg, end=end_char) - - # training epoch - self.train_epoch() - # evaluate model - self.val_epoch() - - # update learning rate - # NOTE: change in `torch>=1.1.0`, `scheduler.step()` - # is now called AFTER `optimizer.step()` - if self.scheduler is not None: - self.scheduler.step() - - if self.waiting_time > self.patience: - # we have waited a sufficient number of epochs - # to perform early stopping - logger.info(">" * 5) - logger.info(f"Early stopping at epoch {self.epoch}") - logger.info(">" * 5) - break - - self.model.load_state_dict( - torch.load( - os.path.join( - self.out_path, - "00_best_model_weights.pkl", - ) - ) - ) - - if self.tb_writer is not None: - # close tensorboard writer - self.tb_writer.flush() - self.tb_writer.close() - - return self.model - - -class SemiSupervisedTrainer(Trainer): - def __init__( - self, - unsup_criterion: Callable, - unsup_dataloader: torch.utils.data.DataLoader, - unsup_weight: Callable, - dan_criterion: Callable = None, - dan_weight: Callable = None, - **kwargs, - ) -> None: - """Train a PyTorch model using both a supervised and - unsupervised loss as described for Interpolation - Consistency Training. - - Parameters - ---------- - unsup_criterion : Callable - loss function for unlabeled samples. - takes both the current `nn.Module` model and a `torch.FloatTensor` - of unlabeled samples as input. - unsup_dataloader : torch.utils.data.DataLoader - data loader supplying unlabeled samples. - unsup_weight : Callable - takes an int epoch as input and returns a weight coefficient - to scale the importance of the unsupervised loss. - dan_criterion : Callable, optional - domain adaptation loss. takes in a model, labeled batch, and - unlabeled batch, and returns a `torch.Tensor` loss value. - dan_weight : Callable, optional - domain adaptation loss weight schedule. - takes an int epoch as input and returns a weight coefficient. - - Returns - ------- - None. - """ - super(SemiSupervisedTrainer, self).__init__(**kwargs) - self.unsup_criterion = unsup_criterion - self.unsup_dataloader = unsup_dataloader - self.unsup_weight = unsup_weight - self.dan_criterion = dan_criterion - if self.dan_criterion is not None: - print("Using a Domain Adaptation Loss.") - self.dan_weight = dan_weight - return - - def train_epoch( - self, - ) -> None: - """ - Perform training using both a supervised and semi-supervised loss. - - Notes - ----- - (1) Sample labeled examples, compute the standard supervised loss. - (2) Sample unlabeled examples, compute unsupervised loss. - (3) Perform backward pass and update parameters. - """ - self.model.train(True) - i = 0 - running_loss = 0.0 - running_sup_loss = 0.0 # supervised loss - running_uns_loss = 0.0 # unsupervised loss - running_dom_loss = 0.0 # domain adaptation loss - running_corrects = 0.0 - running_total = 0.0 - - btrans = self.batch_transformers.get("train", None) - - iter_unsup_dl = iter(self.unsup_dataloader) - for data in self.dataloaders["train"]: - - #################################### - # (1) Prepare data and graph - #################################### - - # get unlabeled batch - unsup_data = next(iter_unsup_dl) - - if btrans is not None: - data = btrans(data) - - if self.use_gpu: - # push all the data to the CUDA device - data["input"] = data["input"].cuda() - data["output"] = data["output"].cuda() - - unsup_data["input"] = unsup_data["input"].cuda() - - # capture gradients on labeled and unlabeled inputs - # do not store gradients on labels - data["input"].requires_grad = True - data["output"].requires_grad = False - - unsup_data["input"].requires_grad = True - - # zero gradients across the graph - self.optimizer.zero_grad() - - #################################### - # (2) Compute loss terms - #################################### - - sup_loss, unsup_loss, sup_outputs = self.unsup_criterion( - model=self.model, - labeled_sample=data, - unlabeled_sample=unsup_data, - ) - - # check supervised classification accuracy - _, predictions = torch.max(sup_outputs, 1) - int_labels = torch.argmax(data["output"], 1) - - correct = torch.sum(predictions.detach() == int_labels.detach()) - - # compute regularization loss - if self.reg_criterion is not None: - reg_loss = self.reg_criterion(self.model) - else: - reg_loss = 0.0 - - # compute the domain adaptation loss if desired - if self.dan_criterion is not None: - dan_weight = self.dan_weight(self.epoch) - # NOTE: pseudolabel confidence is only used if `use_conf_pseudolabels` - # was passed to the initiatilization of `DANLoss` - pseudolabel_confidence = self.unsup_criterion.running_confidence_scores[ - -1 - ][0] - dan_loss = self.dan_criterion( - labeled_sample=data, - unlabeled_sample=unsup_data, - weight=dan_weight, - pseudolabel_confidence=pseudolabel_confidence, - ) - else: - dan_loss = torch.zeros( - 1, - ).float() - dan_loss = dan_loss.to(device=sup_loss.device) - dan_weight = 0.0 - - #################################### - # (3) Perform backward pass - #################################### - - loss = ( - sup_loss - + reg_loss - + (self.unsup_weight(self.epoch) * unsup_loss) - + dan_loss - ) - - if self.verbose > 1: - print("sup. loss: ", sup_loss.item()) - print("usup. loss: ", unsup_loss.item()) - print("usup. weight: ", self.unsup_weight(self.epoch)) - if self.dan_criterion is not None: - print("Dom. loss: ", dan_loss.item()) - print("Dom. weight: ", dan_weight) - print("total loss: ", loss.item()) - if np.isnan(loss.data.cpu().numpy()): - raise RuntimeError("NaN loss encountered in training") - - # compute gradients in a backward pass, update parameters - loss.backward() - self.optimizer.step() - - # statistics update - labeled_n = data["input"].size(0) - unlabel_n = unsup_data["input"].size(0) - - running_loss += loss.item() - running_sup_loss += sup_loss.item() - running_uns_loss += unsup_loss.item() - running_dom_loss += dan_loss.item() - running_corrects += float(correct.item()) - running_total += float(data["input"].size(0)) - - if i % 100 == 0 and self.verbose: - print("Iter : ", i) - print("running_sup_loss : ", running_sup_loss / (i + 1)) - print("running_uns_loss : ", running_uns_loss / (i + 1)) - print("running_dom_loss : ", running_dom_loss / (i + 1)) - print("running_loss : ", running_loss / (i + 1)) - print("running_acc : ", running_corrects / running_total) - print("corrects: %f | total: %f" % (running_corrects, running_total)) - # append to log - with open(self.log_path, "a") as f: - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(running_loss / (i + 1)) - + ",train\n" - ) - i += 1 - - epoch_sup_loss = running_sup_loss / len(self.dataloaders["train"]) - epoch_uns_loss = running_uns_loss / len(self.dataloaders["train"]) - epoch_dom_loss = running_dom_loss / len(self.dataloaders["train"]) - epoch_loss = running_loss / len(self.dataloaders["train"]) - epoch_acc = running_corrects / running_total - - if self.tb_writer is not None: - self.tb_writer.add_scalar( - "Loss/train", - epoch_loss, - self.epoch, - ) - self.tb_writer.add_scalar( - "Acc/train", - epoch_acc, - self.epoch, - ) - self.tb_writer.add_scalar( - "Loss/super", - epoch_sup_loss, - self.epoch, - ) - self.tb_writer.add_scalar( - "Loss/unsup", - epoch_uns_loss, - self.epoch, - ) - self.tb_writer.add_scalar( - "SSL/UnsWeight", - self.unsup_weight(self.epoch), - self.epoch, - ) - if self.dan_criterion is not None: - self.tb_writer.add_scalar( - "Loss/domain", - epoch_dom_loss, - self.epoch, - ) - self.tb_writer.add_scalar( - "SSL/DomWeight", - self.dan_weight(self.epoch), - self.epoch, - ) - - # add embedding - dlabel = self.dan_criterion.dlabel.numpy() - self.tb_writer.add_embedding( - self.dan_criterion.x_embed, - metadata=dlabel.tolist(), - global_step=self.epoch, - tag="Embed/DAN", - ) - - # compute the entropy of mixing - dan_embedding = self.dan_criterion.x_embed.numpy() - - eom = compute_entropy_of_mixing( - X=dan_embedding, - y=dlabel[:, 0], - n_neighbors=100, - n_iters=512, - n_jobs=-1, - ) - self.tb_writer.add_scalar( - "SSL/entropy_of_mixing", - np.mean(eom), - self.epoch, - ) - self.tb_writer.add_histogram( - "SSL/dist_entropy_of_mixing", - eom, - self.epoch, - ) - self.tb_writer.add_scalar( - "SSL/domain_acc", - self.dan_criterion.dan_acc, - self.epoch, - ) - - for i, param in enumerate( - self.dan_criterion.dann.domain_clf.parameters() - ): - self.tb_writer.add_histogram( - f"Grad/domain_clf_{i:04}", - param.grad, - self.epoch, - ) - self.tb_writer.add_scalar( - "SSL/dan_n_conf_pseudolabels", - self.dan_criterion.n_conf_pseudolabels, - self.epoch, - ) - self.tb_writer.add_scalar( - "SSL/dan_p_conf_pseudolabels", - self.dan_criterion.n_conf_pseudolabels - / self.dan_criterion.n_total_unlabeled, - self.epoch, - ) - - self.tb_writer.flush() - - for i, named_mod in enumerate(self.model.classif.named_modules()): - module_name = named_mod[0] - module = named_mod[1] - for j, param in enumerate(module.parameters()): - self.tb_writer.add_histogram( - f"Grad/{module_name}/{j:04}", - param.grad, - self.epoch, - ) - - # add the running confidence scores of unlabeled examples - # if we're using MixMatch - if hasattr(self.unsup_criterion, "running_confidence_scores"): - # get the number of confident pseudolabels - # and the total number of pseudolabels per batch - n_conf = torch.Tensor( - [ - torch.sum(s[0]).item() - for s in self.unsup_criterion.running_confidence_scores - ] - ) - n_total = torch.Tensor( - [ - s[0].size(0) - for s in self.unsup_criterion.running_confidence_scores - ] - ) - conf_dist = torch.cat( - [s[1] for s in self.unsup_criterion.running_confidence_scores], - dim=0, - ) - self.tb_writer.add_scalar( - "SSL/p_conf_pseudolabels", - torch.sum(n_conf) / torch.sum(n_total), - self.epoch, - ) - self.tb_writer.add_scalar( - "SSL/avg_pseudolabel_conf", - torch.mean(conf_dist), - self.epoch, - ) - self.tb_writer.add_histogram( - "SSL/dist_p_conf_pseudolabels", - n_conf / n_total, - self.epoch, - ) - self.tb_writer.add_histogram( - "SSL/pseudolabel_conf", - conf_dist, - self.epoch, - ) - - # append to log - with open(self.log_path, "a") as f: - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(epoch_loss) - + ",train_epoch\n" - ) - # write out the supervised and unsupervised components - # of loss separately - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(epoch_sup_loss) - + ",train_epoch_sup\n" - ) - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(epoch_uns_loss) - + ",train_epoch_uns\n" - ) - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(self.unsup_weight(self.epoch)) - + ",train_epoch_uns_weight\n" - ) - if self.verbose: - print("{} Sup. Loss : {:.6f}".format("train", epoch_sup_loss)) - print("{} Unsup. Loss : {:.6f}".format("train", epoch_uns_loss)) - print( - "{} Unsup. Weight : {:.6f}".format( - "train", self.unsup_weight(self.epoch) - ) - ) - if self.dan_criterion is not None: - print("{} Dom. Loss : {:.6f}".format("train", epoch_dom_loss)) - print(f"train Dom. Weight : {self.dan_weight(self.epoch)}") - print("{} Loss : {:.4f}".format("train", epoch_loss)) - print("{} Acc : {:.4f}".format("train", epoch_acc)) - print( - "TRAIN EPOCH corrects: %f | total: %f" - % (running_corrects, running_total) - ) - return - - -class MultiTaskTrainer(Trainer): - def __init__( - self, - criteria: List[dict], - unsup_dataloader: torch.utils.data.DataLoader = None, - **kwargs, - ) -> None: - """Train a multitask model with multiple criteria using - labeled and unlabeled dataloaders. - - Parameters - ---------- - criteria : List[dict] - dictionary describing a single task criterion, containing keys. - function - callable with `dict` kwargs `labeled_sample` - and `unlabeled_sample`, `nn.Module` kwarg `model`, - a `float` kwarg `weight`, and returns `torch.FloatTensor`. - weight - Callable, maps `int` epoch to `float` weight. - can also pass float value for constant weight. - validation - bool, use criterion for validation loss. - unsup_dataloader : torch.utils.data.DataLoader - data loader supplying unlabeled samples. - **kwargs : dict - passed to `Trainer` parent. Include: - model - nn.Module - criterion - Callable - optimizer - torch.optim.Optimizer - dataloaders - dict - out_path - str - n_epochs - int - min_epochs - int - patience - int - use_gpu - bool - scheduler - torch.optim.lr_scheduler - - Returns - ------- - None. - - Notes - ----- - criteria are applied sequentially, such that values extracted in one - criterion can be added to the dictionary and used in another. - if a criterion has a `no_weight=True` attribute, loss weights are not - applied in the train loop (useful for DAN, weights applied to rev'd grads). - all criteria should implement a `.train(bool)` method, even if they do not - contain trainable parameters. - """ - kwargs.update({"criterion": None}) - super(MultiTaskTrainer, self).__init__(**kwargs) - - self.criteria = criteria - # check that criteria provided are actually callable - for c in self.criteria: - fxn = c.get("function", None) - weight = c.get("weight", None) - if not callable(fxn): - msg = "One of the criteria provided is not callable.\n" - msg += f"\t{fxn}" - raise ValueError(fxn) - - if not callable(weight) and type(weight) != float: - msg = 'One of the criteria did not include a `"weight"` property.\n' - msg += f"\t{fxn}\n" - msg += f"\tweight : {weight}" - raise ValueError(msg) - - self.unsup_dataloader = unsup_dataloader - self.best_weights = None - return - - def train_epoch( - self, - ) -> float: - """Perform a training loop by evaluating all the criteria - in `self.criteria` sequentially, then computing the weighted - loss and backproping.""" - - self.model.train(True) - - i = 0 - # setup running values for all losses - running_losses = np.zeros(len(self.criteria)) - - btrans = self.batch_transformers.get("train", None) - - if self.unsup_dataloader is not None: - iter_unsup_dl = iter(self.unsup_dataloader) - - for data in self.dataloaders["train"]: - - #################################### - # (1) Prepare data and graph - #################################### - - if btrans is not None: - data = btrans(data) - - if self.use_gpu: - # push all the data to the CUDA device - data["input"] = data["input"].cuda() - data["output"] = data["output"].cuda() - - # get unlabeled batch - if self.unsup_dataloader is not None: - unsup_data = next(iter_unsup_dl) - unsup_data["input"] = unsup_data["input"].to( - device=data["input"].device, - ) - # unsup_data["input"].requires_grad = True - else: - unsup_data = None - - # capture gradients on labeled and unlabeled inputs - # do not store gradients on labels - # data["input"].requires_grad = True - # data["output"].requires_grad = False - - # zero gradients across the graph - self.optimizer.zero_grad() - - #################################### - # (2) Compute loss terms - #################################### - - loss = torch.zeros( - 1, - ).to(device=data["input"].device) - for crit_idx, crit_dict in enumerate(self.criteria): - - crit_fxn = crit_dict["function"] - weight_fxn = crit_dict["weight"] - - crit_name = crit_fxn.__class__.__name__ - crit_name = crit_dict.get("name", crit_name) - logger.debug(f"Computing criterion: {crit_name}") - - # get the current weight from the weight function, - # or use the constant weight value - weight = weight_fxn(self.epoch) if callable(weight_fxn) else weight_fxn - # prepare crit_fxn for loss computation - crit_fxn.train(True) - if hasattr(crit_fxn, "epoch"): - # update the epoch attribute for use by any internal functions - crit_fxn.epoch = self.epoch - - crit_loss = crit_fxn( - labeled_sample=data, - unlabeled_sample=unsup_data, - model=self.model, - weight=weight, - ) - - if hasattr(crit_fxn, "no_weight"): - # don't reweight the loss, already performed - # internally in the criterion - weight = 1.0 - - logger.debug(f"crit_loss: {crit_loss}") - logger.debug(f"weight: {weight}") - - # weight losses and accumulate - weighted_crit_loss = crit_loss * weight - logger.debug(f"weighted_crit_loss: {weighted_crit_loss}") - logger.debug(f"loss: {loss}, type {type(loss)}") - - loss += weighted_crit_loss - - running_losses[crit_idx] += crit_loss.item() - if self.verbose: - logger.debug(f"weight {crit_name} : {weight}") - logger.debug(f"batch {crit_name} : {weighted_crit_loss}") - - # backprop - loss.backward() - # update parameters - self.optimizer.step() - - # perform logging - n_batches = len(self.dataloaders["train"]) - - epoch_losses = running_losses / n_batches - - if self.verbose: - for crit_idx, crit_dict in enumerate(self.criteria): - crit_name = crit_dict["function"].__class__.__name__ - # get a stored name if it exists - crit_name = crit_dict.get("name", crit_name) - logger.info(f"{crit_name}: {epoch_losses[crit_idx]}") - - if self.tb_writer is not None: - for crit_idx in range(len(self.criteria)): - crit_dict = self.criteria[crit_idx] - crit_name = crit_dict["function"].__class__.__name__ - crit_name = crit_dict.get("name", crit_name) - self.tb_writer.add_scalar( - "loss/" + crit_name, - float(epoch_losses[crit_idx]), - self.epoch, - ) - weight_fxn = crit_dict["weight"] - weight = weight_fxn(self.epoch) if callable(weight_fxn) else weight_fxn - self.tb_writer.add_scalar( - "weight/" + crit_name, - float(weight), - self.epoch, - ) - - return np.sum(epoch_losses) - - @torch.no_grad() - def val_epoch(self): - """Perform a pass through the validation data.""" - self.model.train(False) - i = 0 - running_losses = np.zeros(len(self.criteria)) - running_corrects = 0 - running_total = 0 - - if self.unsup_dataloader is not None: - iter_unsup_dl = iter(self.unsup_dataloader) - - btrans = self.batch_transformers.get("val", None) - for data in self.dataloaders["val"]: - - # if a batch transformer is present, - # transform the data before use - if btrans is not None: - data = btrans(data) - - if self.use_gpu: - data["input"] = data["input"].cuda() - data["output"] = data["output"].cuda() - - if self.unsup_dataloader is not None: - unsup_data = next(iter_unsup_dl) - unsup_data["input"] = unsup_data["input"].to( - device=data["input"].device - ) - else: - unsup_data = None - - inputs = data["input"] - labels = data["output"] # one-hot - - # zero gradients - self.optimizer.zero_grad() - - # perform a forward pass to get prediction accuracies, regardless - # of what other tasks our model is performing - outputs = self.model(inputs) - _, predictions = torch.max(outputs, 1) - - # remake an integer version of the labels for quick checking - int_labels = torch.argmax(labels, 1) - correct = torch.sum(predictions.detach() == int_labels.detach()).item() - - running_corrects += float(correct) - running_total += int(int_labels.size(0)) - - logger.debug(f"PRED\n{predictions[:10, ...]}") - logger.debug(f"LABEL\n{int_labels[:10, ...]}") - logger.debug(f"CORRECT: {correct}") - - # compute losses - losses = [] - for crit_idx, crit_dict in enumerate(self.criteria): - - if not crit_dict.get("validation", False): - continue - - crit_fxn = crit_dict["function"] - weight_fxn = crit_dict["weight"] - # get the current weight from the weight function, - # or use the constant weight value - weight = weight_fxn(self.epoch) if callable(weight_fxn) else weight_fxn - - crit_fxn.train(False) - crit_loss = crit_fxn( - labeled_sample=data, - unlabeled_sample=unsup_data, - model=self.model, - weight=weight, - ) - - crit_name = crit_fxn.__class__.__name__ - - if hasattr(crit_fxn, "no_weight"): - # don't reweight the loss, already performed - # internally in the criterion - weight = 1.0 - # weight losses and accumulate - weighted_crit_loss = crit_loss * weight - losses.append(weighted_crit_loss) - running_losses[crit_idx] += weighted_crit_loss.item() - - logger.debug(f"{crit_name}: {crit_loss}") - logger.debug(f"\tweight : {weight}") - logger.debug(f"weighted {crit_name}: {weighted_crit_loss}") - - epoch_losses = running_losses / len(self.dataloaders["val"]) - epoch_acc = running_corrects / running_total - - epoch_loss = np.sum(epoch_losses) - - # append to log - with open(self.log_path, "a") as f: - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(epoch_loss / (i + 1)) - + ",val_epoch\n" - ) - - # add one epoch to the waiting time for best loss - # if we had a new best loss, the counter is reset below - self.waiting_time += 1 - if (epoch_loss < self.best_loss) and (self.epoch >= self.min_epochs): - self.best_loss = epoch_loss - self.waiting_time = 0 - torch.save( - self.model.state_dict(), - os.path.join(self.out_path, f"model_weights_{self.epoch:03d}.pkl"), - ) - logger.info(f"Saving best model weights, epoch {self.epoch}...") - torch.save( - self.model.state_dict(), - os.path.join(self.out_path, "00_best_model_weights.pkl"), - ) - self.best_weights = copy.deepcopy(self.model.state_dict()) - logger.info("Saved best weights.") - - # also save the best weights of additional model components - for crit_fxn in self.criteria: - if crit_fxn["function"].__class__.__name__ == "DANLoss": - # save DAN weights - logger.info("Saving DAN weights...") - weights = crit_fxn["function"].dann.state_dict() - torch.save( - weights, - os.path.join( - self.out_path, - f"02_best_dan_weights.pkl", - ), - ) - elif crit_fxn["function"].__class__.__name__ == "ReconstructionLoss": - # save AE weights - logger.info("Saving Reconstruction weights...") - weights = crit_fxn["function"].rec_model.state_dict() - torch.save( - weights, - os.path.join( - self.out_path, - f"03_best_reconstruction_weights.pkl", - ), - ) - else: - pass - - with open(self.log_path, "a") as f: - f.write( - str(self.epoch) - + "," - + str(i) - + "," - + str(epoch_loss) - + ",best_model_weights\n", - ) - - if self.tb_writer is not None: - self.tb_writer.add_text( - "BestWeights", - f"Saved best weights at {self.epoch}, loss {epoch_loss}", - self.epoch, - ) - self.tb_writer.flush() - - elif self.epoch % self.save_freq == 0: - torch.save( - self.model.state_dict(), - os.path.join( - self.out_path, - "model_weights_" + str(self.epoch).zfill(3) + ".pkl", - ), - ) - - elif self.epoch == (self.n_epochs - 1): - torch.save( - self.model.state_dict(), - os.path.join(self.out_path, "01_final_model_weights.pkl"), - ) - if self.verbose: - logger.info(f"{self.waiting_time} epochs since last best weights.\n") - - if self.tb_writer is not None: - self.tb_writer.add_scalar("Loss/val", epoch_loss, self.epoch) - self.tb_writer.add_scalar("Acc/val", epoch_acc, self.epoch) - self.tb_writer.flush() - - if self.verbose: - logger.info("{} Loss : {:.4f}".format("val", epoch_loss)) - logger.info("{} Acc : {:.4f}".format("val", epoch_acc)) - logger.info( - "VAL EPOCH corrects: %f | total: %f" % (running_corrects, running_total) - ) - - return epoch_loss - - -"""Loss weight scheduling""" - - -class ICLWeight(object): - def __init__( - self, - ramp_epochs: int, - burn_in_epochs: int = 0, - max_unsup_weight: float = 10.0, - sigmoid: bool = False, - ) -> None: - """Schedules the interpolation consistency loss - weights across a set of epochs. - - Parameters - ---------- - ramp_epochs : int - number of epochs to increase the unsupervised - loss weight until reaching a maximum value. - burn_in_epochs : int - epochs to wait before increasing the unsupervised loss. - max_unsup_weight : float - maximum weight for the unsupervised loss component. - sigmoid : bool - scale weight using a sigmoid function. - - Returns - ------- - None. - """ - self.ramp_epochs = ramp_epochs - self.burn_in_epochs = burn_in_epochs - self.max_unsup_weight = max_unsup_weight - self.sigmoid = sigmoid - # don't allow division by zero, set step size manually - if self.ramp_epochs == 0.0: - self.step_size = self.max_unsup_weight - else: - self.step_size = self.max_unsup_weight / self.ramp_epochs - print( - "Scaling ICL over %d epochs, %d epochs for burn in." - % (self.ramp_epochs, self.burn_in_epochs) - ) - return - - def _get_weight( - self, - epoch: int, - ) -> float: - """Compute the current weight""" - if epoch >= (self.ramp_epochs + self.burn_in_epochs): - weight = self.max_unsup_weight - elif self.sigmoid: - x = (epoch - self.burn_in_epochs) / self.ramp_epochs - coef = np.exp(-5 * (x - 1) ** 2) - weight = coef * self.max_unsup_weight - else: - weight = self.step_size * (epoch - self.burn_in_epochs) - - return weight - - def __call__( - self, - epoch: int, - ) -> float: - """Compute the weight for an unsupervised IC loss - given the epoch. - - Parameters - ---------- - epoch : int - current training epoch. - - Returns - ------- - weight : float - weight for the unsupervised component of IC loss. - """ - if type(epoch) != int: - raise TypeError(f"epoch must be int, you passed a {type(epoch)}") - if epoch < self.burn_in_epochs: - weight = 0.0 - else: - weight = self._get_weight(epoch) - return weight diff --git a/build/lib/scnym/utils.py b/build/lib/scnym/utils.py deleted file mode 100644 index 1e4cab1..0000000 --- a/build/lib/scnym/utils.py +++ /dev/null @@ -1,743 +0,0 @@ -""" -Utility functions -""" -import torch -import numpy as np -import anndata -from scipy import sparse -import pandas as pd -import tqdm -from scipy import stats -import scanpy as sc -from sklearn.neighbors import NearestNeighbors, KNeighborsRegressor -from sklearn.metrics.pairwise import euclidean_distances -from typing import Union, Callable - - -def make_one_hot( - labels: torch.LongTensor, - C=2, -) -> torch.FloatTensor: - """ - Converts an integer label torch.autograd.Variable to a one-hot Variable. - - Parameters - ---------- - labels : torch.LongTensor or torch.cuda.LongTensor - [N, 1], where N is batch size. - Each value is an integer representing correct classification. - C : int - number of classes in labels. - - Returns - ------- - target : torch.FloatTensor or torch.cuda.FloatTensor - [N, C,], where C is class number. One-hot encoded. - """ - if labels.ndimension() < 2: - labels = labels.unsqueeze(1) - one_hot = torch.zeros( - [ - labels.size(0), - C, - ], - dtype=torch.float32, - device=labels.device, - ) - target = one_hot.scatter_(1, labels, 1) - - return target - - -def l1_layer0( - model: torch.nn.Module, -) -> torch.FloatTensor: - """Compute l1 norm for the first input layer of - a `CellTypeCLF` model. - - Parameters - ---------- - model : torch.nn.Module - CellTypeCLF model with `.classif` module. - - Returns - ------- - l1_reg : torch.FloatTensor - [1,] l1 norm for the first layer parameters. - """ - # get the parameters of the first classification layer - layer0 = list(model.classif.modules())[1] - params = layer0.parameters() - l1_reg = None - - # compute the l1_norm - for W in params: - if l1_reg is None: - l1_reg = W.norm(1) - else: - l1_reg = l1_reg + W.norm(1) - return l1_reg - - -def append_categorical_to_data( - X: Union[np.ndarray, sparse.csr.csr_matrix], - categorical: np.ndarray, -) -> (Union[np.ndarray, sparse.csr.csr_matrix], np.ndarray): - """Convert `categorical` to a one-hot vector and append - this vector to each sample in `X`. - - Parameters - ---------- - X : np.ndarray, sparse.csr.csr_matrix - [Cells, Features] - categorical : np.ndarray - [Cells,] - - Returns - ------- - Xa : np.ndarray - [Cells, Features + N_Categories] - categories : np.ndarray - [N_Categories,] str category descriptors. - """ - # `pd.Categorical(xyz).codes` are int values for each unique - # level in the vector `xyz` - labels = pd.Categorical(categorical) - idx = np.array(labels.codes) - idx = torch.from_numpy(idx.astype("int32")).long() - categories = np.array(labels.categories) - - one_hot_mat = make_one_hot( - idx, - C=len(categories), - ) - one_hot_mat = one_hot_mat.numpy() - assert X.shape[0] == one_hot_mat.shape[0], "dims unequal at %d, %d" % ( - X.shape[0], - one_hot_mat.shape[0], - ) - # append one hot vector to the [Cells, Features] matrix - if sparse.issparse(X): - X = sparse.hstack([X, one_hot_mat]) - else: - X = np.concatenate([X, one_hot_mat], axis=1) - return X, categories - - -def get_adata_asarray( - adata: anndata.AnnData, -) -> Union[np.ndarray, sparse.csr.csr_matrix]: - """Get the gene expression matrix `.X` of an - AnnData object as an array rather than a view. - - Parameters - ---------- - adata : anndata.AnnData - [Cells, Genes] AnnData experiment. - - Returns - ------- - X : np.ndarray, sparse.csr.csr_matrix - [Cells, Genes] `.X` attribute as an array - in memory. - - Notes - ----- - Returned `X` will match the type of `adata.X` view. - """ - if sparse.issparse(adata.X): - X = sparse.csr.csr_matrix(adata.X) - else: - X = np.array(adata.X) - return X - - -def build_classification_matrix( - X: Union[np.ndarray, sparse.csr.csr_matrix], - model_genes: np.ndarray, - sample_genes: np.ndarray, - gene_batch_size: int = 512, -) -> Union[np.ndarray, sparse.csr.csr_matrix]: - """ - Build a matrix for classification using only genes that overlap - between the current sample and the pre-trained model. - - Parameters - ---------- - X : np.ndarray, sparse.csr_matrix - [Cells, Genes] count matrix. - model_genes : np.ndarray - gene identifiers in the order expected by the model. - sample_genes : np.ndarray - gene identifiers for the current sample. - gene_batch_size : int - number of genes to copy between arrays per batch. - controls a speed vs. memory trade-off. - - Returns - ------- - N : np.ndarray, sparse.csr_matrix - [Cells, len(model_genes)] count matrix. - Values where a model gene was not present in the sample are left - as zeros. `type(N)` will match `type(X)`. - """ - # check types - if type(X) not in (np.ndarray, sparse.csr.csr_matrix): - msg = f"X is type {type(X)}, must `np.ndarray` or `sparse.csr_matrix`" - raise TypeError(msg) - n_cells = X.shape[0] - # check if gene names already match exactly - if len(model_genes) == len(sample_genes): - if np.all(model_genes == sample_genes): - print("Gene names match exactly, returning input.") - return X - - # instantiate a new [Cells, model_genes] matrix where columns - # retain the order used during training - if type(X) == np.ndarray: - N = np.zeros((n_cells, len(model_genes))) - else: - # use sparse matrices if the input is sparse - N = sparse.lil_matrix( - ( - n_cells, - len(model_genes), - ) - ) - - # map gene indices from the model to the sample genes - model_genes_indices = [] - sample_genes_indices = [] - common_genes = 0 - for i, g in tqdm.tqdm(enumerate(sample_genes), desc="mapping genes"): - if np.sum(g == model_genes) > 0: - model_genes_indices.append(int(np.where(g == model_genes)[0])) - sample_genes_indices.append( - i, - ) - common_genes += 1 - - # copy the data in batches to the new array to avoid memory overflows - gene_idx = 0 - n_batches = int(np.ceil(N.shape[1] / gene_batch_size)) - for b in tqdm.tqdm(range(n_batches), desc="copying gene batches"): - model_batch_idx = model_genes_indices[gene_idx : gene_idx + gene_batch_size] - sample_batch_idx = sample_genes_indices[gene_idx : gene_idx + gene_batch_size] - N[:, model_batch_idx] = X[:, sample_batch_idx] - gene_idx += gene_batch_size - - if sparse.issparse(N): - # convert to `csr` from `csc` - N = sparse.csr_matrix(N) - print("Found %d common genes." % common_genes) - return N - - -def knn_smooth_pred_class( - X: np.ndarray, - pred_class: np.ndarray, - grouping: np.ndarray = None, - k: int = 15, -) -> np.ndarray: - """ - Smooths class predictions by taking the modal class from each cell's - nearest neighbors. - - Parameters - ---------- - X : np.ndarray - [N, Features] embedding space for calculation of nearest neighbors. - pred_class : np.ndarray - [N,] array of unique class labels. - groupings : np.ndarray - [N,] unique grouping labels for i.e. clusters. - if provided, only considers nearest neighbors *within the cluster*. - k : int - number of nearest neighbors to use for smoothing. - - Returns - ------- - smooth_pred_class : np.ndarray - [N,] unique class labels, smoothed by kNN. - - Examples - -------- - >>> smooth_pred_class = knn_smooth_pred_class( - ... X = X, - ... pred_class = raw_predicted_classes, - ... grouping = louvain_cluster_groups, - ... k = 15,) - - Notes - ----- - scNym classifiers do not incorporate neighborhood information. - By using a simple kNN smoothing heuristic, we can leverage neighborhood - information to improve classification performance, smoothing out cells - that have an outlier prediction relative to their local neighborhood. - """ - if grouping is None: - # do not use a grouping to restrict local neighborhood - # associations, create a universal pseudogroup `0`. - grouping = np.zeros(X.shape[0]) - - smooth_pred_class = np.zeros_like(pred_class) - for group in np.unique(grouping): - # identify only cells in the relevant group - group_idx = np.where(grouping == group)[0].astype("int") - X_group = X[grouping == group, :] - # if there are < k cells in the group, change `k` to the - # group size - if X_group.shape[0] < k: - k_use = X_group.shape[0] - else: - k_use = k - # compute a nearest neighbor graph and identify kNN - nns = NearestNeighbors( - n_neighbors=k_use, - ).fit(X_group) - dist, idx = nns.kneighbors(X_group) - - # for each cell in the group, assign a class as - # the majority class of the kNN - for i in range(X_group.shape[0]): - classes = pred_class[group_idx[idx[i, :]]] - uniq_classes, counts = np.unique(classes, return_counts=True) - maj_class = uniq_classes[int(np.argmax(counts))] - smooth_pred_class[group_idx[i]] = maj_class - return smooth_pred_class - - -class RBFWeight(object): - def __init__( - self, - alpha: float = None, - ) -> None: - """Generate a set of weights based on distances to a point - with a radial basis function kernel. - - Parameters - ---------- - alpha : float - radial basis function parameter. inverse of sigma - for a standard Gaussian pdf. - - Returns - ------- - None. - """ - self.alpha = alpha - return - - def set_alpha( - self, - X: np.ndarray, - n_max: int = None, - dm: np.ndarray = None, - ) -> None: - """Set the alpha parameter of a Gaussian RBF kernel - as the median distance between points in an array of - observations. - - Parameters - ---------- - X : np.ndarray - [N, P] matrix of observations and features. - n_max : int - maximum number of observations to use for median - distance computation. - dm : np.ndarray, optional - [N, N] distance matrix for setting the RBF kernel parameter. - speeds computation if pre-computed. - - Returns - ------- - None. Sets `self.alpha`. - - References - ---------- - A Kernel Two-Sample Test - Arthur Gretton, Karsten M. Borgwardt, Malte J. Rasch, - Bernhard Schölkopf, Alexander Smola. - JMLR, 13(Mar):723−773, 2012. - http://jmlr.csail.mit.edu/papers/v13/gretton12a.html - """ - if n_max is None: - n_max = X.shape[0] - - if dm is None: - # compute a distance matrix from observations - if X.shape[0] > n_max: - ridx = np.random.choice( - X.shape[0], - size=n_max, - replace=False, - ) - X_p = X[ridx, :] - else: - X_p = X - - dm = euclidean_distances( - X_p, - ) - - upper = dm[np.triu_indices_from(dm, k=1)] - - # overwrite_input = True saves memory by overwriting - # the upper indices in the distance matrix array during - # median computation - sigma = np.median( - upper, - overwrite_input=True, - ) - self.alpha = 1.0 / (2 * (sigma ** 2)) - return - - def __call__( - self, - distances: np.ndarray, - ) -> np.ndarray: - """Generate a set of weights based on distances to a point - with a radial basis function kernel. - - Parameters - ---------- - distances : np.ndarray - [N,] distances used to generate weights. - - Returns - ------- - weights : np.ndarray - [N,] weights from the radial basis function kernel. - - Notes - ----- - We weight distances with a Gaussian RBF. - - .. math:: - - f(r) = \exp -(\alpha r)^2 - - """ - # check that alpha parameter is set - if self.alpha is None: - msg = "must set `alpha` attribute before computing weights.\n" - msg += "use `.set_alpha() method to estimate from data." - raise ValueError(msg) - - # generate weights with an RBF kernel - weights = np.exp(-((self.alpha * distances) ** 2)) - return weights - - -def knn_smooth_pred_class_prob( - X: np.ndarray, - pred_probs: np.ndarray, - names: np.ndarray, - grouping: np.ndarray = None, - k: Union[Callable, int] = 15, - dm: np.ndarray = None, - **kwargs, -) -> np.ndarray: - """ - Smooths class predictions by taking the modal class from each cell's - nearest neighbors. - - Parameters - ---------- - X : np.ndarray - [N, Features] embedding space for calculation of nearest neighbors. - pred_probs : np.ndarray - [N, C] array of class prediction probabilities. - names : np.ndarray, - [C,] names of predicted classes in `pred_probs`. - groupings : np.ndarray - [N,] unique grouping labels for i.e. clusters. - if provided, only considers nearest neighbors *within the cluster*. - k : int - number of nearest neighbors to use for smoothing. - dm : np.ndarray, optional - [N, N] distance matrix for setting the RBF kernel parameter. - speeds computation if pre-computed. - - Returns - ------- - smooth_pred_class : np.ndarray - [N,] unique class labels, smoothed by kNN. - - Examples - -------- - >>> smooth_pred_class = knn_smooth_pred_class_prob( - ... X = X, - ... pred_probs = predicted_class_probs, - ... grouping = louvain_cluster_groups, - ... k = 15,) - - Notes - ----- - scNym classifiers do not incorporate neighborhood information. - By using a simple kNN smoothing heuristic, we can leverage neighborhood - information to improve classification performance, smoothing out cells - that have an outlier prediction relative to their local neighborhood. - """ - if grouping is None: - # do not use a grouping to restrict local neighborhood - # associations, create a universal pseudogroup `0`. - grouping = np.zeros(X.shape[0]) - - smooth_pred_probs = np.zeros_like(pred_probs) - smooth_pred_class = np.zeros(pred_probs.shape[0], dtype="object") - for group in np.unique(grouping): - # identify only cells in the relevant group - group_idx = np.where(grouping == group)[0].astype("int") - X_group = X[grouping == group, :] - y_group = pred_probs[grouping == group, :] - # if k is a Callable, use it to define k for this group - if callable(k): - k_use = k(X_group.shape[0]) - else: - k_use = k - - # if there are < k cells in the group, change `k` to the - # group size - if X_group.shape[0] < k_use: - k_use = X_group.shape[0] - - # set up weights using a radial basis function kernel - rbf = RBFWeight() - rbf.set_alpha( - X=X_group, - n_max=None, - dm=dm, - ) - - if "dm" in kwargs: - del kwargs["dm"] - # fit a nearest neighbor regressor - nns = KNeighborsRegressor( - n_neighbors=k_use, - weights=rbf, - **kwargs, - ).fit(X_group, y_group) - smoothed_probs = nns.predict(X_group) - - smooth_pred_probs[group_idx, :] = smoothed_probs - g_classes = names[np.argmax(smoothed_probs, axis=1)] - smooth_pred_class[group_idx] = g_classes - - return smooth_pred_class - - -def argmax_pred_class( - grouping: np.ndarray, - prediction: np.ndarray, -): - """Assign class to elements in groups based on the - most common predicted class for that group. - - Parameters - ---------- - grouping : np.ndarray - [N,] partition values defining groups to be classified. - prediction : np.ndarray - [N,] predicted values for each element in `grouping`. - - Returns - ------- - assigned_classes : np.ndarray - [N,] class labels based on the most common class assigned - to elements in the group partition. - - Examples - -------- - >>> grouping = np.array([0,0,0,1,1,1,2,2,2,2]) - >>> prediction = np.array(['A','A','A','B','A','B','C','A','B','C']) - >>> argmax_pred_class(grouping, prediction) - np.ndarray(['A','A','A','B','B','B','C','C','C','C',]) - - Notes - ----- - scNym classifiers do not incorporate neighborhood information. - This simple heuristic leverages cluster information obtained by - an orthogonal method and assigns all cells in a given cluster - the majority class label within that cluster. - """ - assert ( - grouping.shape[0] == prediction.shape[0] - ), "`grouping` and `prediction` must be the same length" - groups = sorted(list(set(grouping.tolist()))) - - assigned_classes = np.zeros(grouping.shape[0], dtype="object") - - for i, group in enumerate(groups): - classes, counts = np.unique(prediction[grouping == group], return_counts=True) - majority_class = classes[np.argmax(counts)] - assigned_classes[grouping == group] = majority_class - return assigned_classes - - -def compute_entropy_of_mixing( - X: np.ndarray, - y: np.ndarray, - n_neighbors: int, - n_iters: int = None, - **kwargs, -) -> np.ndarray: - """Compute the entropy of mixing among groups given - a distance matrix. - - Parameters - ---------- - X : np.ndarray - [N, P] feature matrix. - y : np.ndarray - [N,] group labels. - n_neighbors : int - number of nearest neighbors to draw for each iteration - of the entropy computation. - n_iters : int - number of iterations to perform. - if `n_iters is None`, uses every point. - - Returns - ------- - entropy_of_mixing : np.ndarray - [n_iters,] entropy values for each iteration. - - Notes - ----- - The entropy of batch mixing is computed by sampling `n_per_sample` - cells from a local neighborhood in the nearest neighbor graph - and contructing a probability vector based on their group membership. - The entropy of this probability vector is computed as a metric of - intermixing between groups. - - If groups are more mixed, the probability vector will have higher - entropy, and vice-versa. - """ - # build nearest neighbor graph - n_neighbors = min(n_neighbors, X.shape[0]) - nn = NearestNeighbors( - n_neighbors=n_neighbors, - metric="euclidean", - **kwargs, - ) - nn.fit(X) - nn_idx = nn.kneighbors(return_distance=False) - - # define query points - if n_iters is not None: - # don't duplicate points when sampling - n_iters = min(n_iters, X.shape[0]) - - if (n_iters is None) or (n_iters == X.shape[0]): - # sample all points - query_points = np.arange(X.shape[0]) - else: - # subset random query points for entropy - # computation - assert n_iters < X.shape[0] - query_points = np.random.choice( - X.shape[0], - size=n_iters, - replace=False, - ) - - entropy_of_mixing = np.zeros(len(query_points)) - for i, ridx in enumerate(query_points): - # get the nearest neighbors of a point - nn_y = y[nn_idx[ridx, :]] - - nn_y_p = np.zeros(len(np.unique(y))) - for j, v in enumerate(np.unique(y)): - nn_y_p[j] = sum(nn_y == v) - nn_y_p = nn_y_p / nn_y_p.sum() - - # use base 2 to return values in bits rather - # than the default nats - H = stats.entropy(nn_y_p) - entropy_of_mixing[i] = H - return entropy_of_mixing - - -"""Find new cell state based on scNym confidence scores""" - -from sklearn.metrics import calinski_harabasz_score - - -def _optimize_clustering(adata, resolution: list = [0.1, 0.2, 0.3, 0.5, 1.0]): - scores = [] - for r in resolution: - sc.tl.leiden(adata, resolution=r) - s = calinski_harabasz_score(adata.obsm["X_scnym"], adata.obs["leiden"]) - scores.append(s) - cl_opt_df = pd.DataFrame({"resolution": resolution, "score": scores}) - best_idx = np.argmax(cl_opt_df["score"]) - res = cl_opt_df.iloc[best_idx, 0] - sc.tl.leiden(adata, resolution=res) - print("Best resolution: ", res) - return cl_opt_df - - -def find_low_confidence_cells( - adata: anndata.AnnData, - confidence_threshold: float = 0.5, - confidence_key: str = "Confidence", - use_rep: str = "X_scnym", - n_neighbors: int = 15, -) -> pd.DataFrame: - """Find cells with low confidence predictions and suggest a potential - number of cell states within the low confidence cell population. - - Parameters - ---------- - adata : anndata.AnnData - [Cells, Genes] experiment containing an scNym embedding and scNym - confidence scores. - confidence_threshold : float - threshold for low confidence cells. - confidence_key : str - key in `adata.obs` containing confidence scores. - use_rep : str - tensor in `adata.obsm` containing the scNym embedding. - n_neighbors : int - number of nearest neighbors to use for NN graph construction - prior to community detection. - - Returns - ------- - None. - Adds `adata.uns["scNym_low_confidence_cells"]`, a `dict` containing - keys `"cluster_optimization", "n_clusters", "embedding"`. - Adds key to `adata.obs["scNym_low_confidence_cluster"]`. - - Notes - ----- - """ - # identify low confidence cells - adata.obs["scNym Discovery"] = ( - adata.obs[confidence_key] < confidence_threshold - ).astype(bool) - low_conf_bidx = adata.obs["scNym Discovery"] - - # embed low confidence cells - lc_ad = adata[adata.obs["scNym Discovery"], :].copy() - sc.pp.neighbors(lc_ad, use_rep=use_rep, n_neighbors=n_neighbors) - sc.tl.umap(lc_ad, min_dist=0.3) - - cl_opt_df = _optimize_clustering(lc_ad) - - lc_embed = lc_ad.obs.copy() - for k in range(1, 3): - lc_embed[f"UMAP{k}"] = lc_ad.obsm["X_umap"][:, k - 1] - - # set the outputs - adata.uns["scNym_low_confidence_cells"] = { - "cluster_optimization": cl_opt_df, - "n_clusters": len(np.unique(lc_ad.obs["leiden"])), - "embedding": lc_embed, - } - adata.obs["scNym_low_confidence_cluster"] = "High Confidence" - adata.obs.loc[low_conf_bidx, "scNym_low_confidence_cluster",] = lc_ad.obs[ - "leiden" - ].apply(lambda x: f"Low Confidence {x}") - return diff --git a/scnym.egg-info/PKG-INFO b/scnym.egg-info/PKG-INFO deleted file mode 100644 index 7fb0b57..0000000 --- a/scnym.egg-info/PKG-INFO +++ /dev/null @@ -1,46 +0,0 @@ -Metadata-Version: 2.1 -Name: scnym -Version: 0.3.3 -Summary: Semi supervised adversarial network networks for single cell classification -Home-page: http://github.com/calico/scnym -Author: Jacob C. Kimmel, David R. Kelley -Author-email: jacobkimmel+scnym@gmail.com, drk@calicolabs.com -License: Apache -Classifier: Environment :: Console -Classifier: Intended Audience :: Science/Research -Classifier: Topic :: Scientific/Engineering :: Bio-Informatics -Requires-Python: >=3.6 -License-File: LICENSE -Requires-Dist: anndata==0.8.0 -Requires-Dist: ConfigArgParse==1.1 -Requires-Dist: h5py==3.10.0 -Requires-Dist: leidenalg==0.8.0 -Requires-Dist: louvain==0.7.0 -Requires-Dist: numba==0.49.1 -Requires-Dist: numpy==1.21.0 -Requires-Dist: numpy-groupies==0.9.13 -Requires-Dist: pandas==1.5.3 -Requires-Dist: pytest==5.4.1 -Requires-Dist: python-dateutil==2.8.2 -Requires-Dist: PyYAML==5.3.1 -Requires-Dist: requests==2.26.0 -Requires-Dist: requests-cache==0.5.2 -Requires-Dist: requests-oauthlib==1.3.0 -Requires-Dist: requests-toolbelt==0.9.1 -Requires-Dist: matplotlib==3.6.3 -Requires-Dist: scanpy==1.6.0 -Requires-Dist: scikit-learn==0.22.2.post1 -Requires-Dist: scikit-misc==0.1.3 -Requires-Dist: scipy==1.4.1 -Requires-Dist: six==1.14.0 -Requires-Dist: tensorboard==2.2.1 -Requires-Dist: tensorboard-plugin-wit==1.6.0.post2 -Requires-Dist: tensorboardX==2.1 -Requires-Dist: torch==1.4.0 -Requires-Dist: torchvision==0.5.0 -Requires-Dist: tqdm==4.44.1 -Requires-Dist: umap-learn==0.3.10 -Requires-Dist: urllib3==1.26.6 -Requires-Dist: protobuf==3.20.* - -scNym uses the semi-supervised MixMatch framework and domain adversarial training to take advantage of information in both the labeled and unlabeled datasets. diff --git a/scnym.egg-info/SOURCES.txt b/scnym.egg-info/SOURCES.txt deleted file mode 100644 index 1e4a6c6..0000000 --- a/scnym.egg-info/SOURCES.txt +++ /dev/null @@ -1,60 +0,0 @@ -LICENSE -README.md -VERSION -demo_script.sh -requirements.txt -setup.py -.github/workflows/python-package.yml -assets/processed_data.md -assets/scnym_icon.png -assets/scnym_mixmatch_diagram.png -assets/scnym_mmdan_diagram.png -baseline/README.md -baseline/baseline.R -baseline/baseline.py -configs/default_config.txt -notebooks/scnym_classif_tutorial.ipynb -scnym/__init__.py -scnym/__main__.py -scnym/api.py -scnym/attributionpriors.py -scnym/dataprep.py -scnym/distributions.py -scnym/interpret.py -scnym/losses.py -scnym/main.py -scnym/model.py -scnym/predict.py -scnym/scnym_ad.py -scnym/trainer.py -scnym/utils.py -scnym.egg-info/PKG-INFO -scnym.egg-info/SOURCES.txt -scnym.egg-info/dependency_links.txt -scnym.egg-info/entry_points.txt -scnym.egg-info/requires.txt -scnym.egg-info/top_level.txt -scnym/__pycache__/__init__.cpython-38.pyc -scnym/__pycache__/api.cpython-38.pyc -scnym/__pycache__/attributionpriors.cpython-38.pyc -scnym/__pycache__/dataprep.cpython-38.pyc -scnym/__pycache__/distributions.cpython-38.pyc -scnym/__pycache__/interpret.cpython-38.pyc -scnym/__pycache__/losses.cpython-38.pyc -scnym/__pycache__/main.cpython-38.pyc -scnym/__pycache__/model.cpython-38.pyc -scnym/__pycache__/predict.cpython-38.pyc -scnym/__pycache__/trainer.cpython-38.pyc -scnym/__pycache__/utils.cpython-38.pyc -tests/test_api.py -tests/test_da.py -tests/test_dataprep.py -tests/test_guide.py -tests/test_interpret.py -tests/test_main.py -tests/test_mixmatch.py -tests/test_model.py -tests/test_multitask.py -tests/test_reconstruction.py -tests/test_trainer.py -tests/test_utils.py \ No newline at end of file diff --git a/scnym.egg-info/dependency_links.txt b/scnym.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/scnym.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/scnym.egg-info/entry_points.txt b/scnym.egg-info/entry_points.txt deleted file mode 100644 index 5d7f4c0..0000000 --- a/scnym.egg-info/entry_points.txt +++ /dev/null @@ -1,3 +0,0 @@ -[console_scripts] -scnym = scnym.main:main -scnym_ad = scnym.scnym_ad:main diff --git a/scnym.egg-info/requires.txt b/scnym.egg-info/requires.txt deleted file mode 100644 index e9ae8ff..0000000 --- a/scnym.egg-info/requires.txt +++ /dev/null @@ -1,31 +0,0 @@ -anndata==0.8.0 -ConfigArgParse==1.1 -h5py==3.10.0 -leidenalg==0.8.0 -louvain==0.7.0 -numba==0.49.1 -numpy==1.21.0 -numpy-groupies==0.9.13 -pandas==1.5.3 -pytest==5.4.1 -python-dateutil==2.8.2 -PyYAML==5.3.1 -requests==2.26.0 -requests-cache==0.5.2 -requests-oauthlib==1.3.0 -requests-toolbelt==0.9.1 -matplotlib==3.6.3 -scanpy==1.6.0 -scikit-learn==0.22.2.post1 -scikit-misc==0.1.3 -scipy==1.4.1 -six==1.14.0 -tensorboard==2.2.1 -tensorboard-plugin-wit==1.6.0.post2 -tensorboardX==2.1 -torch==1.4.0 -torchvision==0.5.0 -tqdm==4.44.1 -umap-learn==0.3.10 -urllib3==1.26.6 -protobuf==3.20.* diff --git a/scnym.egg-info/top_level.txt b/scnym.egg-info/top_level.txt deleted file mode 100644 index 0431c2e..0000000 --- a/scnym.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -scnym diff --git a/scnym/__pycache__/__init__.cpython-38.pyc b/scnym/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 5d3d7e734cb67147c2f1848843ea563d1038bf88..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 452 zcmYk2&q@O^5XQ6ZX8+at40|n>T|o;TL<)LRP*A+&64E5JvB_r1?iRLh#2!JY%swl zA&E&zYBG|UoaCk;g_)6Agam#;i{x0|Gj2EKZB;%fW29b|ckG}<`3wcAwH%N2Tj<#T zo}P8Q8O?fYn9>!uW?2fiUvs7vx0-FbP&?xD>Y}>*gjb_Q=BKPxK86d8&jjmP>tyS5 z)%4Oq+IwuQklLppgyOwVdk4MJc|7z=caMt`Ur@@1erFw}KBrU$cSvWH9;EA(Z9pIw ze#A`!;s6|w1f&63Kn{3E?lj~9-?#P78d;A6Yj$<(?A9^U)twz2mvuXWXG2@Y_iQVn e9A^I`rK(XqrByrn>>5TGw7dxucfm4_VBsH@1b^88 diff --git a/scnym/__pycache__/api.cpython-38.pyc b/scnym/__pycache__/api.cpython-38.pyc deleted file mode 100644 index 0b2a6b34d89e6b0e21ebf6bd454295bd8b58c3dd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 34712 zcmdsgdvF~0ec#>Q19ykR;YmFBT!NBFP~`EUmraY5Xg*|F62*{|B_FLE4z~;7fWsZM zyC8|vB6dUN*~n3nY3jsM1G=3~ZD$h4jjL(W$+St+ z%9hp7=llEZ-X1{8veV8a1?GOc``h39@qK@P`@+CLA%VaA=Wg1+@>_|-@AD!4H-(2M z@pJ!1Dv_`fm4ubFjAnAhs2D4$N@^utNv~uonIz*=&Fo5FrBCkD&0HmqxOB5n>6dG< zG9cGdr6kwE%Aj0_R)#AhI?rfjRN}@eV{+Y5*&)~Q%D7x7Did;@tW3&vsxl?lot2$( z-BsBo*WH!fxMrGrR`yo*qU@~I*W9;aR!q6i;eLN*zuf0>KV6xY`vUF{R1U~}Kko0V z+$Z-%+}~fhKbf#6Hy^MM-bq#tSp$_Bt5i8`4OSkshVVa(|B*M-l_S<@<)}4g?XbqJ z32V}tvUXa#@V^^RyR1FhA?{@|NwgooZ7Y$PjX zdvx=dUAD%i5_gig&ODy54)Z#dupWFovE}U%ygiDyNAR|c-!VMzk3Ap9^9lXDJ@-kx zeE@Gy>f8_EIU9R^7|)OB=k2*4)wv(tk~@56Zk8TCKc-9LGfJ~QQaNtb>^}QM^q;a% z+9&3XsRUvkvJJZrDIb5cV4b$kyq2jvjI<}MwDrWBDeFn=>}%=DBX()?(L0G){Ku_x zucdD%E03u%t@B8G!McdFAF<|iU47OkFy2pFpG27-wVtt_eJxQrg_<*V+Wr{omXNKm zo_o`UvaZ51}OkOJ}W5 zy_vUOv|d7*k6U#~fxhDZ41P>=2K6JIeNt+^j@9uQ>t(d?oGy)NPg>vqX4a}&vnb`f zmA;yov1nnwVTW_teRBNk^ zLo;T>HN8dKT&Xo$=Gn_nn|8}uZ8ut;Ip21W%XZvy;o2fn&9AlUUZdSIYt3f+CZ6%$ zbC8V%*IJgj>eyC8B?)U|HIfDu6_`fVtWzGEDVrZ}dy8g!o^5dLs1Db>*+Bc}Y_rjt zckEZz(2wRiim2CZ*R?HkZar*UVZrvQC{U`gt?Es?v9Rd5s%JIxrc+y8wVkMBR8;d! zt9`T8Y}YK8jn+8}I;T1Bv{%gbnq$_R?KL#nYdf_C+niggFWX+ZQ1dXxC%u}3icw2X zQL9c{H_)-?ZO3laZPTiGHP`lB8KZi;Rj+w=3qQAjKBJfPLOfu*@+iIOheV1@QcoiM<8uKXc znTzeF^$ylY0`ZwOuhDdQv(mQgCT`N4d|dnVyx=y+R^xFM_j#-a+u=ZUt}J@qs(b3# zF+C{dg?4+PX=5e1<$8PNSiRP4)Z0gI+H-EhvyZOSYe#j;W6QPHLiObF6OUD0udz~H zom;7w7ayrvo$7a2rmUc-)Mlf#T=i;mP1`-zoyu?#l$YFgtMlUjtqQoOsxtjjaFCY> zZ++FSjLI$Nve{l(uhzK@XVR6z;@V2BRbBAxTb1GNeYLT&vevRIm;vmtAcc9FF@lUN zM5l;p3i>R2zP8r%-m&oX&LIQ~_~Gx5pQz+&Rt*#9)fSGP+x=g^_UO(h-br9?FD^VJ zfByz4RvNcfYF>S@Qb2P~tX3_jy$Xcw+PakAed5|XV<>Im=YNiWzy8Ebe`Tz;wy=VI zR$X=NHLKmM&Dl*i=+m*4!L=5aXSMENVjV2K%3!Oi-(i)tTW*lNR_S+YE2~v|wOwCy zE2X(Lr&VpV!sh`CLlNZ<2Kg9EACwreQoE&RuTofv^1frBvEN|-`NxgWsxCGxfH&SX zYwG}0l_EfqCtYo?c|p#x-G;SRYt9TZjcQYnTSa5JEd!(J1f@2%(@LXbJJnWg#SV%J z1XL}%URw_ea~#5|i}%6MT&-C{HcL&d?FK_CZ7ilAqmB`(HCGpF!H^oRZg+wa9b==H zo?WXi+D=e#-DY*g_7>Zgx?jEtxZ(yy*_u`T)DM8+ww<|l&9Q>PR=e7=Z&o#IU+*kj zBM?@g-IRQUo`9;)HEun}9thy60Z<8yuUR)RST$$;h>112hB00+TWu3XDxAUP>;mix zR*1o&a?o$tE~ZZ)P%s{EL)B`y^)|{~|HfDTCI9~L3FinVswNZm?vg@0v0Bj$EQCeCKX#O$me@;>S5KGCunRzz4rPaUi!2bJ={ZwoXijuFkIkC(i?~VC2olie6Jh!5llYNt~i8$GUK4P;Zcb zp(Q)fwwz%+zdKkyrf?|M`0ASH1{uNWGld`nu;A5#G_cKb(6`{U*H-7&v8iO*tKoAF z`Ks6ii@|_^8?0YhGeK6SW+BKg+w0XDww4v7>+RL`APc~cja<*8`tWZAH#z*C!Oz_Z z?!ZraiRC`$ke~b_xPiBfje=jW3@`D;#9PK=iH(An^b5C>KbZ6kKYKgrrIroH@v~lf zGjoUe7X2*prJ{TikC^mZsd(;JzUrm@>{MbS+xm<)MwDpY@AV3BTVjKAw2x7ybT?te4x&TRFDjE#poyneYlr{hP%* z3Fm9xfR(?EnARlj&;z_1u=@42DA&^Fpr5{zd?V?Oc|%LXe%czaO0OHqgpambgO4YC z^w}E1wGY=}Tr;?i;F`j96xYPk2&w#y0dLeB+uVWY@jJ=e$&Hdf;Fp$DZzR#f9WtWh z91XwV51^+LehJ~^TWJ+%P3h~-_=usr36wW^CyBc$YuBCRa(ZJB_d8XrwR<_SF@(`W z8qCYs(ynC#_5C-b*p1&F{PyylHFn3a_T0hfYz(*dZhgD7FaF*ie8X_}`a=>9zhO96 zti96ueY%CFzV6r8X?;DwYq(PUq;((KVmLpJ@k=b3OZ%7m@Z|djdVN1vN|@4{_7fP% z2RP=fpFqBYlJDz&#yYf|L!K{NGqKb|O9wDx8O)gBe$_hcXQbS(S`Q-4eQMpv41uHa z?vIsx5+xr+$tfr8Jur`1W1Vjre*Ui;pXUl0k+olrx6C>O%KTXE*bpdl6MwQ*PnlQk zmBvvZQ`@=GATb+)I_O`0+TWEEIWP-p_ zY_GdrMCHZ@hFv4eupM)uYAMXC zE!!NtS_`z0O)g?B1eQWNo7S2mP+xN?WsM};^U`6NfkxYrvd}FsG$7VMu>(us20$Bh zI|xkMB<~aseu#IYk<_!GusE0{DeiI&l#LBa$(6^W(VsjDq?S9b3efBvc#IYSII8xX z#x0@?tCzVkXlre}%0XUpw5w+dJePH6TBOH+Iv>C|Yh zZ7V8ec3RNE^bvFVMy-i5FbL?qT?ezoR!Pz59UDvx6J@-H2PW17c*L|pdti0x@=UaP zS~q*Tr_oV`(zaO{lUXhpjqke5B^%J2H7rnfY_3S(-C5q z2?OVhC7?>InG1~@c1slq+GVW?hF1uP?tHad;D<3--(kf|2YN|>@2XFlno-OE>mx6? zfMwn7(*=;SA=QJ9wVlQSM)QzeUMOP+f&M&##beh&SIwB+&Zr)O?`<`}-A8ST*VnCr zOT4!egfo}gEgMT)=3XF0j9N8oj%||2=JbFM059GhyRcT>YE@527e!6E3?hjO8}O&$ zU>~$Wp;6}H)@~4}$d1!MFO(b*TuF170GwC2UkHCfLTg+3*e#GYkeG{Agn_JuG<{T} zv(}1oqaoOofD~*Bhj)IRx3!o+Lp82v6d|9ufx)oMn~Mzqv{~+C^YrO(vnoKLpf(pf zL`PugJF-VqIf_V(T5*o#`~onRAR_Wm-0xZn1RjGcU1bT=AYwIwjor3gcI=~Ic{xnI z8QfD*WduFbcV|1M&7rm5#2#pfPbu1Vwg{3P09Oos zPmyYINY91Y!D?xP4e9n*^`)1SV<*m%B|}AMlWhOHRS8(oY-6smHn9VG>c!}CISG=F zH8+&f2_QAE*bvnKKXVn-k4!FzYHwrZa~)T+9Wr$DxK#0=#fGe=B|-7;tAA;9!zXQHuDgKDqL*_L#t-fla% zk;Y?Pbdl(+q-6UJ#bzcd7eHlOsc}5I9t|@ft3X77voUn4CQenO#J^Q6en_)`GGl{6 zF!4cS0eMNRA-V)Y6xCBjZ-&58kN$;QYQSVvLZV9^&YU^JrQUWtuMwWJ)y#;l9qu3yfI3l%)^IOQr0engLxX2mN^-e z3707qJ@MEfHH%E7a9DM&VckN~h}O7%kW$lwFP}cGZlY$#QgD(EzKmS0)v_$KL)%_< z>xo{Ko9+7bSgl9QgHb^TBO2vk*yC#=Y>IG`OhUYqN;pB)f;6#QE(6p_Wv3-P5{s4% ze^6%dU_4$EU#Ab&n`|dmQtYAM2<)Ss zE-eHoi>5mqd{7%-KH6hk2I*~m2RqP%juq$#YgzN z86CwKff?v!TBRl~bQvZr2-zR;aap1o38EzK4J_r@_W2NNLz9fB4~mGqr;ib~Ieie| zGvrxgNxG~21I9bny7|0QXRolzXDYW1D z2!qoMKE~jq3@EqkjGu?bOtD-~*)44L3yuTcu=C(04mpUg1&Lh+$%A@OvD*7L!AEG8 z1+%bQry{R`QFkd7p%hqnv(6r!P$!bC7DNJdaQevn&}Hb&pz;Lw8&7DGh!(1Eohryi zi+q8ia**i>(Zm9Maqhw0H0&E_9+aVA4MY#C5&Dgq(rCEQr!3o5#biMcAnKpby;eT! z?B-BVBiI?cM7c*u=9{(}h^KW)FU^8WLW5)}LX%p9+^Egk!QLD?hgtXB%eM0zw6zdQ zJa}RT2~}yF9^uEY%R=On%=nxInp>vm?o8d|adyIY_Au2q8g;;Yup|Gfu zb!t#iNR6b!qf!c;lS~b)y1H79Y6n2}YE6X!!6S;cwhG$%^a=aXnVCXoyqiZgK;YDj z2-!RPySoV^*`RtcRwY^g8N+#$BltK2YV0m`9+hpRIV>__5&szC4@I$%htyD=a-5(a z4roMu-ZIEh0cW1PaN-fSHAFbIS z1+7_-d@0Dt%;2%n3i>qh9Hg*2gA7appaf502+&$AGt@W-Wo2E&G;YwZ0KW`ikmi^M z!xzEvVs~F8q*C1)P^t7}1cT}^(ss{`s5yR&b1aZV;D-Pf0Vgu~0$t7^Ns!g*I_Oy! zDjAD#Hb_Dh1N|?7Nu>mJmqar!cRogrbCAyzsk_up-u zuQ7snBWb*!?xxQcj1to448$jmZ|Bkll+4_d3{iuQ&lC*F`|WhfcrVw-9FB*!&ZJ_S z>2W~ zo3!oCMxUS9Kp9I#b4vrap%20RfM3w}>`|_#N4f53=^pjOL_K280&=FLZ#`)? z`cOy7&qVbN`dM{v4f%=9LF)IQQTjT1OzjxbN&iPs(x??Zk71;5C#?}{^!3z6Ki-Yu zw`s1kKFmz{w-hSP~>7@hEvO!l$O)c_02%T+`_KgUS zN(ehg+#f^l?_0VbcgN-VgoLnUM9f17Q~rq0wmvK|k4Om1L&Q9Wa9n2NBb*Jn`)G7` z${N88eeBICqZx%MDr1w+S6P&L%((Z41Ggu*0NPXCP5}HRfRxJN4#%ibcpuf}l_-*4S zM>ckOM>dZl<*qx}-!CQ}Nqk@8O5!EV19Xk&YDIv^Yj@$%KfGQFA{s4?61J7x$n|s7 z8XYl(RDRivDFK1xsQe&}rWvD{oSkw6Xr0K8CMp#z*#4>HTCgMyL0e~q5G+IZbRAl? z$bd1FV4(m#l%OSG!^hzfIg{K8d9E~pp{_%i0np1zi2$VyxfMmUi|P_KMdSfOjmmRv zFn01B7C;7JHp1j#?N@EvJlk}YBD6=)6(yAxDZ)hv{oP>Bwy2B918UZ7<~+zn@Enxr z!Lm_!9$Z{dAee9^RinskcDj?Ao_(V8;APr6f~E^a0IO?`jqb(t+CqNeeVOKn1?smJ zpdEr92)2hJ#O&;iH7F)Pj2iX)3u3mqrPac?Dv};z31z9eMLYp{vwu>YN0%aq(>UcWI#(dXFmcE+yFyW8Z0|yJkLxyFEcv1u8plY zlQ7)774%_^@aCTNRtnG2oZ=j#%{o(H>K&VRgD+fq z_Uy+mJbU4M_1g2#T)5<1Vd*qm3x;VVE`YsSgY6@fFoCgZ1^wqVOae@X{T$<+3e#um z7=7@nuncXx&Wnsn&$ZjlAcs98RuwRd!`f}Hz;14_4GCC~BHx>VQl{--9oU}Ln4dpk z1z9LNTMJ-}p+<9T=Oq?ZyaKTajA#@WnnK&0dBi&xrD~M((8*vF=;i6IA&n$;u1Z5F zxpb~ELxVv-f(m9uFLZ}c2R*|)kbyt<>_wWw!YCC?uydX7vM^Gn)p!ZYMjP^#c1t$| zI4?-Ip$-eOtBv|H*!i>t#nsI2E*nNhGy{UEQLXh8&dm4h247j7A6g$0rUhi$X~7uR`>Q9r~w2B0Yuz{2J4@=+Jf zN9Tu`D}@ysq`6iDKyA@RRYrQ2n_Az=d11a);Iak^&3*(J)SX6<*gGk#?-0T)*w!@I z=X7!;IRf@}%t#xPV0ppTf(_0ZBVdY$z(N;eX~1$zOv%V5bBXbU@qRY7(;!1!eBUq( zu)veXVO1hZ1(TgdY2Qv~agUOVj7gzZTua~1rqkcd=D;rV3I9fN=>2RKZ&RgYR#^E0 z(q@@IO=drbI#HVZXA!UdnL_>dzA=yk|KCXO%>F@PUpiTO-$*e9#RLqctUt+?#*CH_M$#`U|5O5PINP+(pVW# z;aO0=L*PDP7wV_Lho-x9Z!BdWDCLZoUFs9QA#LSAZ>KHrI|vJ)r2F4YqfFR-l9u1d zs(0YcB_;Ut9C+nE@F68Xhj38wJ6=xZ7H%nL4aqf6ZVTq+sAVGmm_N3(&ohyCKXWWiTl-&6 zB6ipxgI($le`NE3zhm>hJBiKv@1W1g#2d+1e%2e`e83;ECdu1@V*}rV)=f_(z|9nw z4*3J{fIINI(T%t6o4`BtKe04p-H*HR&BNf65=#$S55OyIFZh{*wLit4yL=A?oig_B z)kXgQ%jcL-jD%(gv=g|lIHv2ARKd)7l>4FCF7TXd#5)yHi*I0;xgc zU7<4=SR_GNg?Ar`y^5@`K)r(WrHu?o?Jju;;#-J}a>4ltlpPq(obv;$gtUwEV+@F} zX4202p4cF($XKCUI+N7-qaliU1pP!VmZWi~6xrQ~EkP%5&f7JZ&f9M1z{l4b$^!xR zxY`02{18yDHur>{-1)1lJSFI}(yu5kB{(iB5?`U$&ZOQBLXHE=n?h}&00_FB_8ge2 ztm1&*HiWDQG7Xse-0JMTa0`Q|+Nv5K*^0QVp2!Vq4N|NMdy^) z$N8HKzKnnvyJm@orIObjx1BC!=e&VLm0fcjUgeq-*+oORPD5Iu5}lu6fuw$%pJJ)` z^UxVwq={~jCq*YpH)nul6|Z7on>OjE^H-g}#w?`&oaY(P8>>=aW?2PJkd={yj-v$$ z{#E6u2#yoYCWY09w`(xp33A|`nil*41H)?v>6U%V1MMBz!8!jF<$_d%eys(P5n4j% z_JUM>bqxV}LGsr53j~wDiro7$3prfLux{(kZ+|k*>7h1NYjibi%=$zzA`{_6y9U3 zQWO+3yk?VWiQ9>^d@|n4W(&z9pvqKkRYEDb8)cLXhpoZ>64i|`bK6u3BvK+mm^`g|%WauENLFoLQAgb@jiq?H#!r?fP< zIRqyYv;*!85C%5j7~|KCObXMsH zD(}8S>e#yhyc=bjDG&^}!ofwJQMx?S(S^eCF$j74yfMGe+9Bf1oVNoaeJUoPi7hOSlH2A!g5g^1-$ z{PnhUTN5FRgskWYr5b=_%9$d{ODoB+>{qp|%tp+6~P_Ep=*fMNZZ|Pwy0HQcOP}#p-|IJB_ z3wx(u4I8_0?4F{YX*NzqEuQ=yVg-z!B)ywtmvjfrz$vkU6c0ve@MM8Btkr@&2hliY zEA#~CYC9S(H@&JO`5y2=a(MN*&+F>5zfb$V9N?z<;pZ~`k~2SWBeX7ulNgB$_WIQ|O>7 zD9jbgr8CtnLnsx6RLgbuhLBaAow4^Z$q*B~n>^xt0%gMqIuEHATyinM&JETU(Po`V zrQGf{D5W&Wd7W9OwD?4XsX>~a-!veAk3Z!fLNvP0CQ=5)7cV^h)F-Z8t$yJ zB{H^e)orD72nNqZ(<)S^qBcIs)2Nsf|C^|5vjL>GlV zint+3g}4C-dR&AM5H5(l0Iddyq$!Y~*Z|snh!(*g7*qQ&f(pVSQ9`jfSeQ@K(qfxY(#&|j1C01*wXwVUMOQNLCh&OMgLL8MOh^G zkNayt%s{(9!wU5R?E+D3q+U!2+BKFS0RY;4R-@gV1w9nu2WrYyb*A@&fVn3py089{g{{&v^adQ=$J|PB{PA&mcW)1(pWsYbq!^ zy^#i2pNC9?*8;C73+NwZK?}YF&K}n>aP`mzBS-PA)Gi`_*ezHC;PA74c6J;(XFLO0 z7B?r!+50FRnnvCk;Ezkf?Zdy6dUhQ*%scv=S|gBWu+{YQWJ(}wv^)<>fl=}BoVCWJ zZ98-u=~y-azfmucGDy}5_Juh0sQNh;{V z5lh{r#G+o)hd$iegFeA;6{vb|?cq!!abBOl0St{b4z56-nnCHYvJ@5LF)DXL_^pUz zYN#A>R;T7f-UB3u`U3!ADA$yGw?OhOS|M%4C-4#w@c#w2!sflcoTX!n@*9T z-xZOQXmI4MI=~{zDt=nC(@)h_t;N}Cf%8Hb(Dwn+XD^@s#PqC03*i9Y2*4gYPQ?Ht z#v5fmeO??VYCs@#1SAj3(bI;ys>315?d)oDVKf4uyed7LRVI zMg6+h_Skm5Om`t+g@#7a%8{3ALS|k11Q!=MVg{;i5hF!o_3~^~E7fweg5`d^n+p;* zo?5R^r$gML6#`NI$aG%j85-D_w<9KC*@pkBB>rH34Yl`rJtlZ?%tOmDkBRsMFey~_ z^x!Dp%i!EJrP%H62=LEVi1+kCS#mLpa_-M)obGLQE?v5G7rTJ(hI-sKv)XOU`S(D6 zPQc(lF!+xQowS7t#&eUgb+kdIIE?;jKfzzgjELB%V-=cu=At>Uqf8yA&eQ6V4{c;MZL0v zeiG&pB5yN!l1a0+3M*&trY+8IfEtoqdK!%hcS*Fj(D_e3B}U^IwJ2702ca{KJlM}3 zNa?Px%(bz>l(Vb`!qc-eDC;;9LUg!o7_r}A^+C?iRghJeQSM}(r%Q0>@v!-;tWI@E zY|mv)+^tWt+3B5uYA@(&1gkZ+Arx~4%K+257A+N^T;MH7Ey=jA zJ(T#(#xe~@0GGk0u0nrW?tJVno%qh}R|e-4OSWw1?W9N%XZAb4$Bpte2EWf>6G2eA za^d2ID;F-EyC7VE^Pl_J07J=>D;e1? z;jR%xO$~P?fAPYL)u%38x^T5J+N$b3!9}I>^~V737QtUo>;b+h5bflF634bP6hmMl z#zkJ2gRE~PnWa`Ul}n8$X)Ihwr)i?BSVd1Pninr z9!EhCAU=(YS{z&^SZ6IpY~iwsZy&n5AL=4>+|eYo{w37mbB_))a^Hh3V|Hh;oPh0m~21f~8XiFpVqE#tME zpE{3J8>!aFk0ut93%FA$ahLXBcAh~H zjBL*sWW-!YLHvJ#L^D|@jn)JuHA)cmiTO`uxI1fAHTF_^kW~*svY{Y?bcJ#kCy!!& zfz7AWLSl?Y9Pqm^;8Bq=zM0FXVKP#{Qzm0fdT$UI4la%b@xn<`#fNts;+DQg0zT;090Eo=4p;i0`}Fn|gt9 zKgTz*EQ=NRb~oUs9&SIdpEx`4d)vcOmJ|@HCgSQ`Y`6z23i9=hHN+?;lr~q zq|Mwb%86exskkkr@T`Y!Vi??N8cyZY9k~|=xD5=cTya>Z+AnnGC1q5Xc4QrHV#?HF z9IWbWINgsh42UM0ey7BR(2HT5LBxH9>q!vH@FaY2ib;tL1#0wJ;@OAl?4Iq5r$?&x zby++f77i8#aH5rh%qE4C!BViqNi3!D1AYKzpd2af0W%2J_+{Xm4CPf;nzAZffjuC5 zf^WhSWJn&)V%vlq%Fo?STG?H|E4cQ-KOqI%nLa49az2$=1&iJ_8LL29+FO)vQNEUe zfrj?;!XdRLv@bW7bzI-l6_JO?A|s`;3nNEdF5F2CrWZJ3i8v(6PrisFs}B5#+W5Hn z2>KB0vdFH!+S-_&6@(~Lp^h1$bcdLJwxxs`9KP$vd5nhY=!_o6 zcL+SM!N_5VJM5ouH{wPazB;>bchR|pFvwiEa^?9eK`~yj7=1ghGP#f7(#(+4#|Kf= z|2FS_mw^~m*tiSwHiqBEB2x+i=L>xOa}0i-uhVtA(Nvh{O+Kf9!<^s5T}ak*KTAUc zqkaQiEvF3UtAYfAi9u04stK+XWO{MVDAvX=BO44aaf^u-tK+}Jf_{)eFHFH1q*5lC zl|LY8If*ohDO&r1&Q4DnLv#xiE5b2q+eHgcWx!Ah?~^0I$GhPS=uBY@XY#>-3wL3h zpb8EJ9_KJBIJD9#K+D|^P4y()()u=Ya70Vu2+1PN2sZ{|`f8Rn5S9huJWCpU z9UxrSR`QLdJl)p(5&&Zg+HhQ#3f9m*N*TV+Qm1zU8)vqd^rWzvn+)D6*!==l@kD*lgq}Fuu1TTU`#l$ zDXnLlpnyOhHijh*t!0mN+z8?ZVInv}H#@kP4f(@B6=QI|8rzlF*umrQUxTHBup{HN zBk;$qT?-%t{T#+%H_#0f9{!Gb#!O%oau}llf5O^>YoE0ju5yNRIUF0b+K9DGsYbdj z(_?-2mifAEXw@W~>?SM|HB72H0Z}pD{>0{}j5*xy=-!1{oB-;WuFYas2%Lh`JH_%K zhUyKczJ{Y)Bi931p0!s6e9{S=Mn>51WRpG=KIz#rfZiUD_KpCk+Df696ro(a2JSWm ztkqWAmR%Jd%OgaO@?!d)5t(#VpQJ1{ZGSGz)=Ouj2hW1s`-{S5^*|}KjXnX<+z?i+ z(K>`!E?O1n52mY*BgH75gQY;tT*PrvM*-_J>l_szFq^y;;V*C%6!3!MH8C2!CiW&G zg*$y0vunqiqu~)kO2hvhu&Q2c);#*MYL(BUUa^XcnsogH4y#g&>Bx-vpm_-0IWnX8 z-K{INmld~`cy`a0)7+HbjZ(cp@^|;O8{rLw%bZCdh)SQXEijHhTUH3{Zf^X+0 z+u|eG#;lVRx{6;9vu-6e!5Q93DrWudWXCvl%K7gs14wc!{ov#f_tRkdT{8aJgpueR zsD?X18E)K-Gk5+j%lmZ%l|k|MP{TkUBiLt<^3(CEXN>O@l!*haM`p&lb{WnY7De%> z^F9NQ!P^YP^x`*oC$w0aN8&p_$>(u)M5Mbv!^h7vc!>c`MV)_!fG#st)->hpWa3>6 z#AczFjB$4J#U2Lu@=QXl+#)^}A(V}H;QT6z!(v7hIsmUH-00p)&(XC?fg@HGa10lj zlWvr`q78vvIUcN%UB{5F*#WN55?Yl!k5_kiRAmKcPRRKuGP1!4O2ub0LfHjAz#uM4 zLYrCh73E`tG+U{`!7X#Qk2CwT=s@Bi$v}u-6JtE@0m4{1hUouqX7d!;ju-`X?gQh$ zmCfaFcEpIWlR{hOO2^(N4djWHfz+@)d4Jr1h$k}!hZ&q#f%cc{rt~mJ{2?x`gz1}NS}gJmM@>U3J`SX zQg{QA(^B#j{vE&#cltT}+%JO&g}*%wA0c%WVvZgxAVG!TELf@Kob#{``xBgLjxzvo z<^xW7fHepR&)Z4x(DVk$ibwyz=P9oR8App0E5no0c^>&v(Zl%xFs01!i$?VJXcfJZ zibMKA5U=2smxh+JJV#*l3&K%H&+e~yunhrumA4A7Q{?l*xa&vRztc_U{0>VHe;Hmu zezj&LRT`w>A)M!p(8BrOUdhi3@4u7h34j|PADq4fxCz_UmC@cz5I0`BE7=AlE_@hV-e}a)22U%G1L6XArew4m2ATFn*=Qy!k zQcZ8{gue;=U^aGvjN^5;w}T$?{%(r5;G1&G8u6!i);jETrmQg#c;mS4z!k=)QtH$l z06CE06PuG3&LmKG{?r#m5+3zQPJPSK7*K-5u>RqT81(<_WFDLz19R`ru@A; z698k5^X735K+c+cJ-4wB@AvUNTCh)AFb4D2eXJ4f{2G2&yq%l7{4r|^D`(6rZ0^QA z`h>DgoTV_fwAY7+jh2|8U|teAev2zp&>K#YJm5z-)p>1G}Nk?v6tJ`K<;05Bhl#Qh6GX+}M! zP_c)2fh6&ikexyvzYIDBq!tGxmW5J92ns>VM8wiv$59c(F*t+D;-VgXg4?UpTTUvZ zk;^>}yVbV3CjMA1%}~K#$C(0UpBoX3>YGRM^$wg48nst{Vph%<5a7d90hI`w$uvZ|Oy3`7AnF=#zUJ+QJxkPw_yc_$AB;|)p^bu?3k1(RWz@P@V9%cfynDnO4!{`re>rB zoUl2e(C(fSE(`rHk1SIG*wD)zy}CQ=6|VD;1C!4QC@xGN7{alUyYW=t2ZxQAwH7T1e*cgDtiW=w&g>O4PCV+MF&FU|*l^ z3b$Pz$0BN3DVm4H9*{;TLSOh`_JBqr{2Ue7$(SLTJ|<9EE=lYS$fIfhV9Z$}-mRw;mYS_9+QixA+Qo{rJVSOwFuYQmj& z`jKv=J~#JnW^U)SDD2o38o?7YZ6OcBtqtJGwTAPU;gCqg+6zOamFnvg00LCTZ8hYD z@<9w@xIV?gMiD1!@r3NZh*6}h2otKsh$Br1qPiws$}Wil7dA`3P-*qDmy><44&#Sz zMy29_6#4kcDOlMqo|>JXrJP4f3Y&sv;uK1pq%O@NyQaNHr(>toyUP4F)&yiz^ut+n z6gw((JJl5};8PzmlBkU}VSj`bcl*Y^szE^4x>{>9Ua_j=8WQw$M3pZxD1c|(Y7Nvy z>c^%Y4u|0|C!<>u7)mYpqPPm}oOaZzm<~&lxz+8LN@yqDGO>Fb7)T}b)UqxcpKO7n z4Zey2Mpj7EElVa=w{sR8*`>~bt!H6wrA1-&`MBgK#q%?1cI7K=OSubXA~@E zr`)4|nwFDvBSJ|;L$l$=IdcNmaM!jgtfnL5Zj7oN9J>Xh5>--x1??8s|15}=1)B%L zi=J*~w$RzrXbIg76T=0lmGl=o!xZ0(bjR6Gp~VLZcRmRrQCGOLS36PxuHH?q(-=C4 z@;-~%P)Ev8Cej&J0Ey^E;h|F^CYi~7XweEvNr+&hlLhq0U#I`$L|Ur28Pj=z2|mR@ zOtm(7N81Q7t8jiBclf3yWbz)Qw>S+1Q)j^tiQYmmlvI+SFEF{N=0p-y;J{FBqz-DK ze#;qSKyec+0Bw7fUk(uMyF;^)makRUly zCs9>OIzo$(f+WrzVu>X+Um>*ukyTu&2U!w6ydcspoPdJ^jYKp?Az3B0T5VMNX_RqN zSvFK~#4FDCJ&C4-k}=UXs(Et`GE+`)HyhY5cd9b|pc6Xmtpv2IfzDE$(uPI&OB7K3 z3L+DqD&fQv`VQv`#f$;L5p0FtPv?u!UDDGS$Bbr+S(@MAjrxBt2T>ExYoS@rBvMh> z#Dht5*%Cq=i)v8N#dBZufu<~le#(ePDw^-WF0p8gBb7edG%TCf|CQ}$+7UQH(ZmP$ z`ahd=9z)k*XcS2lgRGJXI)BWriu72-PuP6X-yrU&1APWR_fHVOa3hTaRd&NyndTTN z2wkAVN>L;NVkeGc$6S3Xbi>VJh@j&c(Et>hP4Dg^%n(U_rx+96oWDqfkf= zP07n~D{yHNu~vRFhXYVD(319B#n(kohUWr3n4K^AI3xvHRyiC+YewOljKflfHiuDS zi6RYsKgfG)2tqB0isEfKZ$n89Ifh2)`-cdA3|6joS> z0KVr>La9@)w{n?)`}lw%yofY$LSnRy#}K`XZ;WuLK^Bw z5AS6dgpJBAy0w1rJ9^*pvr6Fsrd=n!ILl3Xa{GSo5$(lja+%%k5Kb$2Q(u>Q!U~Q+ zXlRwM;#-Mebx`~YzWRH&I%>ipnbO^8BE{!Ge_mj*6Tt2z^Zxw)44g zRDygo*H8t1iA@rXA9b1ClVqIV;3E|{&fh{%8KieM$QLjg$~EG7zPild`xta}h$SW; z+PX%&8p9wX)90Kb79Z?lapxmp;}r^TvREP2ewBA}L`$mPoOk{;D=;oR<$RPGSWJ*~ zwkYGAWh8nXxK6ofm&HXJXTkB5w(u+!IX?yGs$_8viqJK1&L-so>f_rmFUHv@plHNv z0%y|U}bs=et3@CUt=zA4ZjzE;$eL;+#O}PxH1Nio^Ko)hd*y>vRfmpdw2+DF)OIJ0xcCd2!`d>(Ju?WKW%6W^zc* zIRALOFhY>W2y%Y1x~L779DT~*&CfII#IzD&3P zkN;P!73B-`drb9T5L6~yoOcMBLRlar$Yp-|5)7)C^abh|Nws?VnTLu$IWI`QyMPsF zd(WzGB;=}c@IkdIE2xsYcH!!^>I+w%b-vCl|AWCd8C+l>{K*N9;!pDN6AVbuIn=T$ z<@EyYpwDgN3(hzQX^HWF#30FB&~7D!p;4?2O@f815DPqfr-8OZqQMs3sL+`*xv|C7 zC8TqYqwhGb=nP6pxHk}{2PQ?k8j`BY`6hg#!w&vaML8RQ-xI(;&iJQUK9(I9lPs=m zUplAnRZKkW>WJg9f0{0ZxzkzWcd>Zy9}yEnl${2Vo>T^h_ju~Q&7?DO_KrFVfZ?C@ zwnVhfM)psOpG|;Im3Qyq{%TgW>%Eev%Sc z)zzklS%`T7j5-|rp8^@qg6;t}zv_W-UO^N!=niEEno zp!d+*n)i^f@0j(+aQ?9O2+ki7kKz0z&L8zo;QR#IdVKX1u07$|-ec&|lb(K4tDgLT z=5fAiM8(T)yX`KuMU;E3+w!|rJ+hk8_3&H1;)d>Z-}QuytVZ8+BeT)#Hw=0-{uJ<= z$M-V6!43|gwxfr}su@}XZ6~*@-O+aqPd}z@8v}hOzpL#OcC~FC<(y~i>f2^m+$jxo z&p58_>U_`g^zB?&9+-D@&%~egE%Uf`MZ0x!J3p`n`XGN?8{}rRFKXM?AV=>QU@g9+ zZRg(nnrEPfkU@oGJ?yrJ9v5waHp9S1Q;UnLFN=$F_WFvDBJBh9Q?>L)>y~{n?la17#JxlDR(Yhl zY6{QAHPRMFtDUpYQ{(uY!=L&*J<`w@?5E@F=k2rhX?&iypBbs~jQwm}<1A{Z&pEE~ z0j6A&beR9(E#FgD5YB*KdfZ==X$lLtjQ_r{9jOb}I-YD`;(rNJpXV zcbicTw;L-_c}qw?aN4ak(5-a4O%Yk4yC$lID6d8w72*L$rH;Gd+;-c2)WI5dBD3Xf zM0zLETkmV~5p?Qj*Uzl@9dTx}?{-(t^rR04=mcj1zb_l&Om8z>@w-mXZLGOXfy<5V zW+y(nVJKTm{SdU!lPw?1-`k8zO*h1{IbH8C8V`=*pdETpujpm{h;A82^=W3XX)3q&&wnx*7pqCi59*=r7k0fgL|wWEQFv`}9)aBg_!yT-O2>O1B@2TK_P zTs@|N)UB-#FZ#cCb#;n<%Xi6c#2K-jxHdgGe?+J{+OWL@^ZBS*^`|*(}7G!dhMNHcJdSqhVt5%d- zZu=Pg5nPPSpx=os>POX-Hm=kQ(qDJzs(yvZLLDBZ$_2dReB14v!AU@EYSR^c$|&n{ z7UhvOqxCA`%WL*L3M>+}9jwZZ=|R-(suINFu5k=v4{HX2NVNtydkCV^GY7_uwrvhf zTE%%u&GAT@DB$xizge}^AgqpXyP%(?ASy@^thhaaVd@*;AlIu~630kyj*li6He21+ zRkWnWqMy>I^r9|lbRfqZwG+t4a5geB68tGO9>WPnM3e~HgFvV8=y&u9qCCSbf{d@> zC3eFX%ge1s3&PC~z?wlT+_afj6m{Gnu`5DI&WzwPs^gy67eK4v`r1`mJtd#HGKrWs zzOobudD|rx4Cd^s!UY$J;Fc|dum$20o_!O;X^TypWdH;;KO0Nv;g@omX2#+Eed23d z#8-i^_l!|J+7qDdaC3+fL1dr9l<^$PDoRyDp2bhSjKy=592sAy0(V9)Yi)8PsCB$I7 zk}vJHzaH2uJ@^g@yA}5~#E9MRw%+UuI}jo1T5eplH@OkF5w<`{AerTujOJ`8B*=1z zCWz2x+h0P{o3<2h_FJ;mZPJ_U_}$zD-qQzJkZ=k>*LKjM8gX;<(wO{y1!Pa$qR1ts z?CVge#7{lRo!`q(w+Pp$0op)`qWvpiABgX~CvF3Lu}H6$mFUZ>g^Y3x1tPO8x{(1n z@ig&8Np$-i0uzGSMIORM`2-Ht31%F5o@y21m8<87(Sj(~b^N73K1}!YM!nE*SpL-W z4Tmb!b3M9WpIjHM<_gG!uZ}bAVP;xxdA$_~M>M~I_7n=ZzpOuqziH_HBL1e0DF|M9 zk@^)gbdFWpUVbK>lLGA}oP^pc^abe?Xn*K@$X|fvT>~h<*a3*%1$^E$phM`6MP+E8 z^d9aIJT?K$bKuC_Am`<9EkDQ)bZ8!MH25;_-XJN?ihl^`{EDSF5vkJqpQsiRMKG)q z#({!8o2;Y_#J|+)x^lDnY5IYt;~o@*>f%A#K%T@BvPx&={sD>ZYMu$3l|y9RZi#im zggrQ6W}C_331sPJCsYXp7uk z(CY7@;a~~}?Y=1kv}zQARH1aP;Y!s68W&*@m84jPS<(eZGDt9~e*B?w@@1m`wzjMj znZ8|w%FxJ`yF(VJIlnKqxDSs9GdMJXE^bu_8NH*e<^Rxl)4Hj^I(Ho?#i*T?1c~3* z<;(a{BVw>tevT}M2K34EyVM-%!MtW@TgMwKqOn$cuI)G6cJMq3Nj=r58JTNhQ+}D6 z%i8>HI^~9Q{QM5i16nXmm-9F_s5tx~-TMN*!QbH^G;&4M4R1!6tJZ36H7^RH$lszU zi84wP%P?_?d*3U1IWPZqu3mxZPtFCsK8dHMk}n(#X6#@%lw6}p32r1)z?oyyS+B6^G5X3QRN4?>KI|x8g?nT68GD%nN5} z(`j{~?LkU}e)wW^m@hf%aTsr6Ls1vQZOkNnM#SJ1Tmaf{e_42UqzfuO^Y z`~{km19;JKZq1OmB!Nk_A>wc>@?b63g=~E(28=H;qbp)`994B!zunYTmr=cA%S`CHb)u()FXF}>*sYx_(nP|Fg~CV*-Bf^?bT9LeTJY^Xlb z#eLeQsajmScy7^N_vKn(uM411Og{{7vF!HSA;V~X5&)IR0@J`YRE(#o0s8nbb5?}w zW!)a*;QbbXtL){LWa1Swp(}oP8qOB@e&X(o6NlGYj%nFmoMq{7I-M4*V}hy^yH54k zwo{R^U*CxOFW({|e7WUvlW^=qp5@ICmoqZz+J}T|CD=6J zX`)$fHCcpZ>32X5ii17ru6J#^G(y9Rd!-eoMbw%#nl^qEQplAsa{#<)GQ0L7yrkjc z+(Sq#1c8}-MrrwLo)o;RCcB{ z_L1Q$Vb}{UoH^5kCE8!AfqBjZ{Z_Xv1WcqeLr9s?pYm(a%NO3aqCzaj42z*CSRk2st@vaAf9Com!b(iV>uw@S8)Xz833)l zEN>)F?AsJo-2(s}XyjJtIDd_s0a-xWxxAkJ8M=Wp96u`N^ZM4w1HnNpX=o%)kRZa# z)cSQCLbxyCcz{y@t^?@mRk+61O7cn4+pB;>I~E*EFl$z!(^vD**{W87Yke7xr^0~Z z2dF_E_frdS`hN`>$t?zCr?r7H*koiuDo>h z(recjoGUMV_2tViMKet74%n$1Ku^K11g?Y4c8i{%m5TJG@hP2h;F)n~jriezM=gb{ zb5nYmEx$4xh+DItFqI<>0Df1LU{GWpO{e2?)Rnzxd=f7uTA}$5eu6^?$IXsTaT!8F z-y#Q&85$IAfk9}{9S^g=or}x*aqT+*;;W|oT4<4(_>KlcG7qo{=OxxaTPUSUu?rm=$*k)xdonYwT!6$fP)t`mcaK?(IMg9_-P=>9zhp0Y9`U5$PlJ^`iO zdp^ooM!V-jq5?z3>RpLRrN1HF;CD{vIc+MwJ0v>p4gL+UA7uK$ZcUOG% zU8~iqaXg617y~3N2BRRH>U0{I8p3{Q=VK!IjIUi&Z&raQdku5ZgL{=hF|k+*SkkGX zo@aU^o4XiAF`OZNV-s{y-GjBFtV(bMKiU^dw_{zm|XU~3qF`cZeTf^uhAysy8 zW<7E-+ih{%h088o^|+;)O`S*#m~=#`A;VpXeVqyo9>67Vgobvzm7wg{b4p`&DL}^y z38W-kiRdI^I!RY)ffGzT%u<+`0}-;aGui96$5}v9jRbE%fe3aMOd}A1Ad06oArX|r zpsN1|?&9-bzX^9chkf!39I}1ikiUiN?CTp7+>-Q@4bNBRIC$YxWb_Cn?&n|F(|rn@ z--mDTAP(I9BT4r?cszE^cmPHg#!5y|#Pq^#4d+V>&%!%utT}23RWT4g%BE~b(2q`g)iem@FWh!uLj;lbAqE4=JxHi4YGu<@FIwmI#<33HECZ zMp7*P{E{>nV%nbq0uuF~=;6%Lg!?8Ioe32rvX<9BU3VH$Obg0c0UAQEtkvv7)1`VM zGd@}BBEZ}Okt*ARqy$GVtGO|LmjP}?M&OOf&}fwJ*d6^e2_;VGItp%rH39H@ul&Rf zpid#aF4sXqKpTwTLpWK$YNEK5!oK9{O+FPANX#c9OQwK;WY%l#j~EdX2~B9Ok^7sW!JMP{ORS8 zl|+aZt-M7E853Tf>*o>VH_|tikhoD9ly)Ymoq?%-fp$%=fKX03oPuYve8)gw^$<#V zv@<;@pto^Pf!mAkT7+~x%7sF{lj^7XT{aPNon^^Ua(^-;IZ{o}o&h9&v~I(d#WTrN zp=5@#BQon9IW^=xfQUx2NTUHqZyd!JaX6410r24<5-Z>%N@?!iWXXtF{>+@9dK}aK z5a12Oi6|9LzZ*QZr+cp_c$~wfk&I48MjD?uZ=Do>h~c zFn+gNdj&qQ_NZS})qx_mv*xoFMkqg>xFnkE%LDvEi7bi3o>d#(kJBqY@Eo&{@`zF( zJ(drgk(Rxc#+>+clQn1mS=^reynSwQhzLFnzY|H`t5K44ovfhL{T}cF8s?3y6C+DMUVq>sb~-7jbre7OzTC9 zl8r2Uiw5%H?pmg0p!~6686W0LWrJKz2I?6f-ZMd=h^M>x3WU>aaiLIQ&tF2?2fKs4_`Q^4Tyquf-xZ!tYT9DdNS&WO2KZ~qJ63n%q z%Vk9Rq@DZ4#WTy-H2;QrlYF`m-m#3~PzFagAQ6%&R8I~`vvh70$B#NR(N)kBltEi$ z8e~-dk3B%Jpw~l01%(t!W|4tq_*ZRm_F$qvgJMWXSxJ|40*B1hbQm-dBTJk-JH!U8 zMJbUqrtjF@#1xc`j^+G3bjN-(Lu=kdMiKc56BUSv5KmQ20p=rYU`J&L*Epe(Q<0Qx zCutNFlPBaiaVkAJup)SYO61^IPJa@YvQLL~I=oGX1v+rrU~F>lC+_Z0B{IWRvkaC~ z^OzyDd<%D@GFj4Ua-*qa!uA*C6Xb+c+0(%vku0A>12CM8JR~$^IE>;vel5+qYv|>> zc^J=Dt^$L3%ACGyn1J|C94x8u;d{yUv#{7KsfvD&>T}-FrL}kZ;Tmeo( zx#Bo)BJ)q(QGT5qhtHAFi_m#gxDqELm_U1xMNwi2sYyw{p+pWj1<&L+a6v6Xhf3*$ zM@F|BgIHdU| zdfoIUU{qMhBC)U?CGQnb%6XGs$t$A-_dDDRQ_<0{y0S$q{~|^DUrLfZw!X_ecC){f zpt}Rtk(l4T?YH1JmEv@O9EsL)3*ed=iL(Q0_UkPQKznWv83FVjZk!VbmXPV-0fWWv zZfGfH%y@5v;?IQ!VV&edW#nrp!|rJmC}`x6c}Gbgln?>~4@cUBLS`IqBjKbEWVzvb z-lGJhy!#61ITa+>45Iiu+Yyra0FvaE3-*o81xiIu0!qko@3@=DP)7tCfp!&Dr>GK5 z=;;s$&Yryuk>U3Eq?$aJ1qFu_8dN?5GP=?1>nj4PP-ap!zyu}waKwgDEkFj(QrvnD z4rK`seb4WD*;`0VVwJGEDuLx%lGy?q0OgKiNb zjEM-_EI>qBXaN}l+ff9QVrI0v31nq$y`Ij;J~Ra-nT(gyX)wiU2xWM!G1sw)m9`eo zE5@#2UjGa~Y9e_`If&`dwM9Cd!U6tbo=>hhHj5ta{~OKX5frpsnWT%7Pb$9q#6j~n z5|L!ZH-fLp$4skx8QkAP=#?{#WwOWzi zACw0ZjH@RIO)JmK_T*p^>y~r>`BTLKoJ|aEGcnLb%n*E^uF)_m&1%K46Kjg;v)wGO z4yk5jtRpc8n50W_W$X##tq!SIz`k_j<+S+`V0po|ySde6bc&{Z1N6t24UWx_sH~J{78+0BrxOZU>)Bu`5MWA%98qDtA$OCk_fe z#za3~ol;nh@f*iehz+BuOW1_~jq45+LktD zFmyAvP97iyhH;y*ut0##P_iF_>AUp$KcK_AI79`<@%)D448g4f(bJ#efq$f4Qko}R z1iA9z@*zoA_h6m`4=^4b!8~P7E}#^I5^&FiSM*ARd%lhBF!ekby%HTw)L^Q@0QN+C z>u;G_UWe}nrgN(4nd9wQGO9TC*`i!PWeJS2Y85eDRfnIM-KcVdkmIzyF|cob`O=Lf z{R)(wWYm3*j7^4G*Scn$}Z zXLARe0CoW;85}A7g=`OKG59tJ5V7x@%Cr%P02DUxV5&^_`Y$dM)l38#ejJ-E9s&;o zDp6o-?EgEox+fWgIsPSzA?V~3*&x#kG0;Awr~Cm{N-+p?yG59olK*L9)C0r>kqMcM zT>_-lDa|@jM{83G^=T6Erh&h=L;n7#YyLyj69Ta2333pfL;jA9{Y7sLXI_dvHn) z`+%m6j5sn?8nX>U?$4q^GkX>a6rUG)+lNhTg|rRw^DypK0E`%Sm~Ycw{#p~KQ$dsX z88k6ifE~0%ugIYL3!>d!I*?j`{vhh+jUCF$PU}W_6*pPd{hC1#O>KPP_Kr)Lu5Ysc&L4= zT4P^d&kw|G_MqfV>Oh$s6;QRw<)TWYtb~dJQm5gX$6hINHqELWZNSmwL9RMyZw~eO z@dU%hOimBBdI(S`|26-I0YFyiAU2ZV(M3C4uHA&=Ef5Qw-GgNDxPQre`E}VaKuU-k zZvwdTv!m1Lp$#B?H$(?!lb29e(>KSnkhYQy8C%R~eXvy=>OWhK$8Grj;^ae~72tE= zUD|TbY1eyp=V!0Ney)B;Efiz@*^L@5%$}=e-MONcn)b2>v$O%M-^p&qeWeZg8>*k^ zBOVy)DT=B-T}!8qPLk=PGEX5MycbgB7I_ShbF$)CJeIEU-HwYrXJ`}ar~VTNhc>j& z;uMMVvru>Ju3*jVY45^v1cT zpSdu1;a0UeH+s&!o*#nOV}i&GJG~c)@9{;Qj^E)V|C|uvb(8@n2!Ka< zx`7jNk4e(c$Q{Nu^V@EVeb&k=%~3>lev((x1@bA|0;bK>tfZBZTi1V1$#% z=ILVIUs}cLuuGCLy8L5$;QMqS>BmDKpD2=zQSmw8V>oi3Pf;pwhNXZYtPgBAG|I*? mvv>mX`{T+3#lp-HlyvLol~eKWi6hqZH??nSpP#-qQ~N)18L&YB diff --git a/scnym/__pycache__/dataprep.cpython-38.pyc b/scnym/__pycache__/dataprep.cpython-38.pyc deleted file mode 100644 index dacf78bb3cb6d3f62fe7bf50b8de6c06f4e5a04e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 17744 zcmd^H+ix7#d7s%&;l)xi*5DyeP?!d zc1cTg(xL^r!W^ENIdg7v&hPtum(fd8Q#l2{554do%c++X7Bg6i}|&)PMhR+Y;e4jIm^I1Nh!})&a0L~Ba`5{+n98Mn2!JCFN=Nx*& zsN93Hhn;&+_8u;KucJ8kHt%cQU%6jZ+^m~*?z^cu_d5@u>;vxoo0hx(b;X@?Ea$-+ zsypjEM5EUBUCcWvRj$feKPf$h6NNe?q;>}JzmZn~jtI9W{O6v|9HIZQ0QcB8WAONyaXiWtsxSh(c1SDNk_x7j><$$m&vlxQ?ot!ewd zRqqI^*|8mzvV1OT)lk;zicZ~Yx_&vAv+z3|1j1YH1+LGhN8=x#zHFVa+TC*7v4ybL z=PfyP<(e<5EgKJB=TcP9E7TEx-g?e$yZ)8Qrl`bv6%*_>{r6hs70YV}?usj-fyr9R z-10gq3p%2(u4w zaNlcN!K%wGVr`Z!$F1AFW?(HHU0A}+x+~)NKarKn1v57gJyz-Xib?2Nj$mKG-*RbKr)0OmLJjdYM1q+eHmiLPoBW$RoP zSKqztnl1(dZ8N*2Y-j_tT+Ial)q%F9Zcc3}IG+ykjXjNGWA8v|Ob=3yenZ=w;XC^V zI_}Tn`mAHDWyL=aOL7gk8r5MYp4s1?35?AHcxK-gcI+Iruzz46J&0?$#++jw!nMXe znOoDos`?+@=}ukS$OMO+^dNIUq0u`A_H@SnSM1_=Wt_+69oxC?^{#MzpO)UlCZ?ru z(H3^g4U(%P-pfbhN_W4Zsm_T_o#;TV(`yHo!~nUrs674r+Twc4YIPji>UhGfVZXJH zoS_GN>mt_r1+UqpLA>8uD2+dR(GFJYUfV&T@d|`}&Eh5jTe2D8$*zHfo`{B$022bk zV7=>(p#c?@EB=qQw8?i-*RbSxUGYnKkwIbvP|*^M?=J#^M@{#0FI%23-$MWKd8^g) zfpa5)vt*&iOWTCbQb`q4fSkoQ0Xd4Gr2pW{mgrpbtw$WI4TN`J?O_Tqb?d{+<r+RI6Uw3#!%7pj8lR?QWQk1f9S>RKnay%!S#@DD$ik9TBF34ogEp=oh9~ z{Dir>=(MVs%I^YHhTRO)n1d?BP?)Nd&=N#DD>+PTBp^c_ z`7qaaMaQo;y){=9xFvtpMn7}EU^jcRE2V5`_-?b#J>hBukyI|u6C|eTLA~X#VCY`o z^>3+Rwkjo2wR%hW@tI?*otArSy=S*qj{#IxfFb@dzta;n_gHs5Snaf{UAwksuei8e zYp=JWl;k1Ms9Wx?iy1WX01p2dB+7%+x~Uqf@s4Q})r^+RPiu6JtGKVdQ%IR=4*6;1 zr&BYUsU245)PmaIpJX=W;cL33ian^lMsIugqddK;GdO&jBw(%5P#yJ<0zB3FWw{(D z#H4$j<@1=NwMf+RN-Ak`I^T!t(B>ofBOZ^F_ahgkt5t|sBslS@eZ}a{PIQ_Jl(bOy zoa?gVT#yRmXW~fV;Ew+tGJ(RPQiVW!H~^)T=Op+B5P#n=NJtD+M_*GQP)JxpY^)jL z;xG?^q6LOy3^XZFFcB$~PZ1T}K2m{}MW-I*HuD2;^UVST zsgA2t4$d5NO%vb5*|fYmFhEVUjTCCzL$yJiW=_(S7o98w=-&>M&Eg=nrJ_U%)1R|H ziTCrI8(8*T#F%Bb9l*PQbnAB7%vc#XBZ^9pwma#Dx!wW7NT6J|uF|$4aX*D1U==aI=x`*vGc$V zNn_`M(_#hF$8!<4)kkG$97K`Pa+$^y%vsWnl-jbkrj3xw#2#odGm;ldV zwSYD+*1L7h3s&j&a6^>KqKLJ4OAAxn;n0-5V^moz?G^iQ7c06Qh=ZKN9EyjLO90R; zo$jY(j*>%^9HzuV0=NPSg=rtO)CImhNH=KTmQ1D15*+S^iq=nx>{0G*Czh3K;Q zASDEwqKqUoXfcVOqwG|K&ZMfLt`YDYq0)ptEWK#Xc&LEV17OLZc6?^jix6*F8VA;`rm$WHt_jRVyZ>l0_W zt!R)a$x2d~Mg4LJVLFQRZd{ z1yw^g029Qu40TqW!QY}PD!5;xLdhR-lv6nTlSq;VFV}Fe#T~t_k)g{moD{5-rjv%1 z(sa{q8dgh%`4>< zG_Jh%M)~yRMVRXL%UP;|$yXUB?K-qx$cfI%LRX@hP7g#s%Eug$#2ecQlz|4V9rimA z6?GAu?=!d~t{?$IGUtmgOiVB1_V-cHCyZ1;VokMw-%ev2QCCfzK+)|;OEr){ID^AK zj|7yBXd2KJh^xUmNcIZY2pw$;sEa=9_khHa#i60k0s-;Y5T}5G#*lbX4IpJ?@=V!( z4jjDu{9;=jIo8gZBjSffk-4K&GmWpUm*b0uz=s(oy?Y>6Je zA`P7MToRAyJc&tk{(qyp)D!xN1*`=wv``Dd?pFL>>q|3cec6!Um| zn~I5DB)2H-RentCe`2_=>^51spwhPk4<-jdcaQABu(Avd%M4=iI;OqV#+Dl?1dDPi zj-v9!r0{$YN7T@~)G|~3*+ge!coCmPsajIKq;_T4Xo^+O|fI@=|d; zUpi*k+&i@GLN+=?UOUUaE3QiCgJeIqG_UxWqIjEMRN>{)!<6H8gViw4f2!=KxQlGG zFz}>oglxfzfntBJLzeYu;oHBB8U1MoMdmS~>itm6ld9*}!&@(vv@1Zt{ zGMn%NHK6T*$*2wRmWat1ThIh?m~2oQzn7LQhxkpPZbIMN)WLe7bFPuD2+ht(9a5lE zKDA+j|Cm5=^9vf8w}LEmz_BZv`j+CPSMc;b&@M-1Pb#l2VO8g0*vg0y&lPY?;mR~L z$Taz4w={T@_BhZe9n`(0Z5D%UV=vlIw?Bd049f508qk*k7#Kgt>!$y z2ifE}p01VgtyI>jZ!0w$PEUtNXJDsv+p>wfQiHQ14)AAP+uSG+4&M*g$3y6#m@8@#8OZIrKSLg;zVNOCIuch-55a>-zCCFic+#1lxQX#C z`%iYqFPBE*geqvEO>Vm^H_?&OSS5VB%#NvvJ2Zf}=($c4Gjbw6Uk!q;f8yA&6_}KJ z%jH_9MeZXSTKw1Z{T{qX3kx6n;6nV|NY@cnh!AOYg&0v5e}DwPe|s8c9mBrv`EdG? zN`uo>mn_7$5X$4YS7BNaAHa2SoNnG9YG{=SXqpG{;U|QR8&ovFIt=w%w-=`PJ&9#{ z?B^+A`|R_SBhd+uAR=VKlyvj40Hv12c}m!Jdyz_!`wspRIoyu&Z>ts1O zg~O*;AJg@`tK%QsV9@i7$Q&if`p6swo&kAQ2=f%FbOD}l0K*Ty{=4-1%crA7_m5AX zTPzu&*{wQIpTl&w%C7iuN^Y5|ye|1P=~=@c8kWNUm9EN;MIFH!!%4;qGr#>vuO_&!^Yu&~YMuZ(ALfG$YxmVdf zOAtW62!hcApc;+NLumr^Xsk<*1L+>2UL{r4B&h99Qp>4=q~_fi?|bS3(5l-JGL&jK zc=Vt}(~=lo{7lrdUDPFC69NllZ6i`Y_Wn>2*(KuosZX*1OO1Ew9<2yg+Qyz+Lcn1C z$nkkgj+X+i;`{HF#OvbASUTdDDfty7B}3xF_|={0{O{0KVr_bk%#tun&L&p*9cVl~ zLkBP1ZzDqn$W0a05QG@C79?EEkx?wZwp@vl%+xLbJa*mKHqTUW1O#^d)05TBx3i2Zp zr`bQa^DdIM2sWaz7vP*YM>9Dkk)Gk1<~K~$5UkA`vz+%Ki0=>~aB|(hIva(2KWQx_=5#l3k|=&BE&sBB{PW7IZplzKkyLR8&4=tE@|@iwz*B}t)rn5mXXdf#4^fHA_DEm(R0NZdVG-ft#6`ALHhJY7112wG z7zBX`PT{{W@69Q*asD9KPP@$Ym{C3XJJs_sln)AM70d_Gd$MghN9)6La^@n=E(>F$ zM!d-(BPKrdSOd#}(R3Oyc7{_x-(dROEP`*DP97{MDnYFyjNPP^!^|_mUPK6*WL{11 zqbUjD{X-YXy+CU=-X79O?aLU6SLp?j)31M@42t6QJu1tz7<;WhJ>2-TCQexW;_#eH z;EL4X37#Td3abDLJQXj^2%p;Fn6Q9y`zYbXVIdc$;lf`*E9AUb?yb;MZHLui){{d6 zR1JY)R03hTD5Yc5&kAm?mE+#sTO-r~3!#_1*Q8B9~h%O6^@);+iBKz#$4w_LKZ z0XFR=4ijBk9;#LoB4#2A!G?AxO>5_N`ZDMlJE*Nve1TQ>1bQczFK8ou)Fel;yxSUl z7m+>qRD*^6SQTEU2)+wGtVW9QCmndmXEFXpLz|abD36ALgY>?Ns{;z%aShfSn&F`TSVlY?$A{- zBs9;!bUDWVMv`{hgrMGSMh%MogYWYQFUNUn;V5S$iDKh?*Y@}mG`UJfUvE%v{SNO# z&!GSZ1V~0CYxok9hJDz*+c1@n5ii6sc4{8b-;p*Q-qVRCnV7xz$ZuYB@hO}OSNf>G zJ4`mO+!NL_?mF$!XIouQctB$7MGsb{3ly{I^~VD=K8^_Nb(h3-Z7qVM*}d*6d6m!FR}oiz9$y3Yk6yI<@#t;wvUl~w!bcX4mydt!iG>f0 z3_^EeX56er-~HLmGJVPdV;-%2>y_i>g?Yf^6ZnLw1QRM=w*=3Xajq~he9{2Ra~`cCmIKIYiS5AraMJ+V;;Fs1>#ub{7LV-Lpo zebmHPq7-A0ZO}PPV&4aGgugb5>6x-O?j)rNzi1E2t)AF@yTdN|7(nKycf7_MgwSQz z5q+cGb2pgy-|Hd`hNxQKDXqxCKP8H+a~`AXvJ5)QV+tzfZV zDQ-~MmVysx@v47KdzD-mDto+mzu`M0t$aLG*Cuvfm^;}-91*;6-KYMFHX&^y6Q+TT z(myi3^&~qjpKNxJ^PfUy_+TYRG#p>2A=J23905X@q3duKwI_b|HR)($?k}hik%!P&bJudk!SDIC|${jVGtv7(N`%nccEIX z;ue*eoNmC*n$hX|w2U#!7Ak|CVdOS5 z(%KJmxq?1TUy@KcV+Q4BRLjU{lQ0%BeNB8D^_~+q#@pszyq~W90g_Kq2Z(aB#dYNS zdYsuim4Nz`i(?6WsMN3FqU1f;=<$#8bTCHJ3L0>1bARviL&-h-1W_&-Rn{{;Ct+qN z=l_QKg#(5p@fS#?o%vUhL!3PaO0&@!-T5-zC1X1BU&Nmk9P~RSejRBk6XrNFn@zEm zC{O0u%9OkwXu~S)mip>&3#Vj-^-YGQNNbx^82pVeXAt{}%AumJqQ-00ESBcd3 zuqY{V5B^-gOrJdooi3Y*(}%bdG);n7tNdu^x9DCLp8=g`4O41pU#Ht?O7>IoK}sq} z@Nv{~11gT7cvL}y77a?iK*?7q`5KZipHM$|;qzOsA4HfE?z!;`S}+XtC!Av{Mif7UP>(BcoBt}lk$V4r|9MT^a4o2;?pOD_Gk2* zUi?MnA?So;ZdHx93y&A)i?g_<7oRRZP<$SenEt(0I9&7#dJ%qNDyQ8x^vqAR+gdg{ lCoi#)`7y5K=!&6c-bOx6d1@{57S1v_yN$moJg;is`!A6pgS`L% diff --git a/scnym/__pycache__/distributions.cpython-38.pyc b/scnym/__pycache__/distributions.cpython-38.pyc deleted file mode 100644 index ad0c70d06ecc56de8815011ac2cca7ff1c421b75..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12479 zcmeHNTaVmEcIK_S**!fyS7}BU+qAXbOfRGv%~+Of#p|qmk*!2KQf&FwQ9`ks>>1AR z(pzLpnlb0a65@x%*@ryjMGvFoNpJg-e-Pv$e_#P30fGd9VITqW5Fl&EvGbiOlHENs zlCJ@Rz};Z6SS(f*Pn|mF`%dw#@$s63-?LZ#qy6$(N&08H82;(F_!*9nTAE*Km1J?R+$!@r6<=2*S@Nr`8o#4?V}89=_s3h~exud!Ct4G{ zyyQ*#Q>`g~x;5?3v}XL-)-1}8IpvK*!-rkDr`ft)y<2J>wvUh8sohiUF}sdCbM|Ay zcZT!!_`R~-uqRM!-hSMkvZwFLtt0k~J$qMb9kmbHhjBjknq<$}^LHhC-l^YFTgP#I z#6F7aqt0<$KV~a$O3f#>e;tRcvu?cNMlo~S{n!nI$moX5=tqum%Z=BKsI$7|8-8dz zUR0~CI%`(!-gJzY+#vK_%QGxH?8T04bXn-5@=f=}t{L<1I8tJzeJE6by zUKlyu%T{}7P;II0h3%y?E6*%Hd+yn@ZR@#n&$pjHv$A~lnbT*VKkHcMtZv6Xcg8ur zykea`#~)m>hX=c42HB9zb~dEN-d61sn)=nIn(9}2^ir0Wmag@Co|Bf|3UJ-j()#dA zr=`~{-?!4r)zFQiFi6KbkVMQZH;5xt9&fYIvO883o0jLLW%gLub6C8UPIw_+ zGcx0l%Gyyn!B2E8?tFayPf!2TpKZPRBASwKr;6WCOYyoBTdC4>J0%)y_G{o=#_=+a z=wFc}%C2-D3c0DU$B~D+B2{eVzI;S|3B}^`w^6F_!UU915vg&nuEtGw{0{t{I-Y z>3HsX7}~}{)UiB^HI1-qScdNgZrh5XxmKqWGTRN-j5x%NZ8vBOWnJX0T(y|xJF&we zetD|+@ypvhrQTk#VvAq+{i4iwtbn@1L>ak~8M%@f3+q-yeYPE=7e+2$6j8s|3t8O6 zXxFWtbAD)?w~dmbGIEX z-L}Jmu5Bt;)I>J&}y`Ez+JR)ZM6PR|1QPLbs zYk}?hPCx=pwVSSUE7c-&n93JZJ#PxDknCToqPJA_`(CQWTRkVObiL4upIJ%E-Wu73 zW`ohfWD}H3QZj`kRUwRYEN{&0xvgE7hv((hAAtKM_7`?%Gr!lF3lw!=dB zlD=mSUGi)Y2IB5iaW^kYRpvwwB=!^r^4aT4>!I%~ZS}2SeF?T_4O$;9MPZ+HoTc7Y zypDEyR%g>%b5Ps~w)`v|v86Da=@gBaR*{)&^(;Dw&LEKv*JVvsB>hWGtIG{}TAo%K z^1O<&FBN6>3q`H}x1#BvE2a8>p+uiobcOv0&o$LO((7o{T=pYtS6IA1#>LmLcuGQc zP34wOON13v#C4*Pb%U)U%P0?*!8)nCr6F5ISHf2D^1@b?k`lK?{|HUuDtq}`*`jY} ziR}M>mgxVo$zapaIAJo%Y=-ir@9YpI^OPK+d~=!>YvlfVNau0)?cc@axC+7C9Me4vYcmD(kko; zmMqXmr$TLKKYFWTQLR9a6Acie%ASg`G-z4eQFkgl55R!D9{hcx?o^ZNt_-lD0BpR1 zJYWV=)mAy!sKy!=OqDOaGGBVNtOU+K5hY_NDWiufzhCE901`XnqTJ>%VW*MgZSkj) zTDD}VeOa_5%Fr&rK{7rsB@KXx%AV)OhMohkp*|B{w&yuL);1~_fu2+noxn~P%idOP z?IUGpBCZ0m0ODYjlgUIfiGHS%DHwo~bzl8X2Ecc3$9)FyFak-62{T*(2zWZein;e( z5OdkgkcAuY8eNw~F+8i^3j-&Jg|oW|lpcq%<-L9B0e`EKd`?U zD!S5+9Cj1ne#j`9%LT0X`4B)7L);IOia~#KDARQ;5?=^X7(IqdHPp^bK! zEw!T0i+mEkU$k~mkAt^-Q@q*#W|xHGE_IDn=+#NXiXs3y19|9<#W3%W>RPB$q0NPy zt`4a)&~w}I!eA`}==q8mXvMOTsXVx&^Zq&Y22{g0{OwkHWprL1o9~I-xmg`QwlN?XP$L^KXCkVh7f- z_z{Pt>z8pvFCa;Xv+4=>>ZZP_MDsD+${qOwS>P{nGcfgV2b6=!RTFtfOTcBj6qr!0 zevZ9XU&`qMsKz#e4-}!miYL;9{MRvP58-rZc^LkRmbmN-UC#ACL~jFrCTb2VX-;}5 zY=Te-Ec1XGVXg-BXwg`B#pzmoF9sj<`i}8aW7#=%R;c|O4(wep#FtK;;v*qSQVahuJPY0_hpc)3ZlLGcw{H_9N;uSofbs7X_kvI1J7sq>&L z>=~4F5LhUF#6ReoYb(LLMA`wTph!!i%#t>$iNZBewvK;~nmF`+#f-qfeZ$EV6D4F; zG?P!LC>B+|wnFhx2dT!v3ZI7#4qP!!7+vUGQ)4R-BwMD0Q)Et-_kiNTERIYZ&15mH ze}@(#0)1hzz3g$|BGc{*4U zj2nCoVED&t zpdk7iB#wk=v(!>-xurV5x)j9A&$fC`X_Xz|)Rt}^a;i>kW9*3ZmFK!$zE}QG>dJP- z*6$)B?KJGFUArsYky{hUQ#6_KlTK}S>OSf@(`b3hndvHM9ep60tYp#TS-R$BlPIIO zver73#gz|#2#L4mAkM?-WF7?0W5CwOYKD{ZJA*LUehYgA@FI(qOh{qk zb4vnreABfZ`#jh5h2`c+W8ucoI=XSvxIqXyy3sUltvi6*BX@I<{VMsJG`7}^}OP}uu@Yj@zNayjpxgiC^0EZi6w^q|jd8crIKY%nNlyDA zK9HiZ<^&F{DH|XX{*R_L5;L~EXq@-0c>Vm1TgLUs^^NPXb311K`#j*jvv3`(#9?h_ zb)5IFbK&in$e_;qJczopbJEyfXX(1%FX~apJ3GxATr?|1f44{b%Od+b3%9bGSMw%E8$-FH6i71YpS_^7Oh}TC^Q7`q*Y9U4Ix8<;exDn~YA8-#ZzLM6(;gRfqEQ0A6D zE%DbB4xk8>rsE?+7zt94afIdINA#D-U~fY*DO4eGPIrR@+6o)ZIz8~N8DpTtq4A7U-J0RSYVPv{mt>Zj zKtP426|pK6k&hhA2?$LeATZTU6B{M5X*P8Pza6hDTr|P+J=4T{X^nH(he$EP$L8__ zIjx#z#{(@i%|T#`UBKfSK#>S$Rm*aQwKQFBe5rtF%o4SbClMSjmj zK`X_N@^o;oPL4T|HX!i_@+yQt`t(3%FX6P;A!}p+pQP~+qLY;EL;L!p8vFVp&EtI$ z?W4Zvu;m##14+!$Y%8muP@_$Sy@r#(2^T5%RFRxOF4eKQ7jA#fA75310}5s z!)kIHmzEia{v#-dL_k~Ge2iC-i8)vL<%=>#xU1YJ#Hv8UWoS51&jz>?ZC4ZSfre{$ zp#7+WSoS-By3b0#FTElC72b)??+RU3SJ%+2bobYPae+XbZ{0@Bk=PQNLrO1M{rTA;+cHkG%X8=w?|g&3O!C@BGe_RHCdNy zT7$!NUEQABH+iCSh^z!l(svk<+7Us~43R0}f}*=Lk?ibt17ea%JhJqT6d0NmE1NZT zMuwKje0#u_p$-%XFWxm)`{g%Z7K%^+uD3^}Cg_ztsqD+K21}(pc zmw%v$nRyfWJ)%!}#Dc8!b0mL?5ldL03KnYvC>Dh2i(2V`VZto7ik*L{peb38_8%14>9F>3C*Y zJC5h&)|DF-(qeXklJiLZ1qTnO=(2W1m-R2oT3O=?EY$c@ML}uyS3aG5Op$9U4z2v9 zLIIyI>Jzhd-ckoGjr_=O&@cJ|$)l(VC<;8Ft=(6Mp4g>(T1!U`pTpo|k};>gF%By6 zb zIkA~T{lm^YCpPrt>^+6|Lm$M9w~kP2yu~??n|b>P$jwnsYK~!%A4`w7unBu9=z0WV z_u3%q9OjY3AF6c<4w_dx01NNOX+O}NALz~xbmw8w#_yT#Z0DPy1u?qJBa32lfjl#P zYe5Jb3mEu%fB196Cl{@JSHko$x3ow&;F6?&{~d(O3csD|7&j+RX|;F*__@l%;kt*m zIA-tU5hm(^@^p~Hi3pX&5(o@15QW5ux zG@9-|85-J;A1R|k*T2c2Sb#n-kTDtvSFeKxLUa1dp@BSFM)~q%zl+C4xCKdvNSKNv zqPrOh1AvYdtS!(ltSA{DVoLbj2=QF#S7{T33|~%Q&AotN^dsmO=7bZd4K$olFSd%j zSagIK(<(y<(xeX|xzu4cxQfzaEsc0WTEX;?RolD5=={XGuhaXJ)`C48 zBbHE;wa?4t5e6YkzZvEbx~%+Wa*h~;U=q_{6VnBon5;5t7t=&RuzjZ`DtuhXhNdko zBS;1AD&AT}iFgMdW0>KeUM>c~V*WiRWTpf`S{q#RE%`L7IgG6%Mw>u<|AxmS1j#!h znjERPh>zO6EK0_OIz|t2Gsb6t>(kRzwnzzW6^dvYXRgG-_$v>|m`j^_N;j{ZdY6xW0QXZ6==NA($9pX~e(uTAm8 diff --git a/scnym/__pycache__/interpret.cpython-38.pyc b/scnym/__pycache__/interpret.cpython-38.pyc deleted file mode 100644 index 31c90e12a187eba97a036518ecd8216f7784cd9b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35948 zcmeHw36LDudERtS&$+Yrfn5Lu8X!etL2v*t1Ex+mok~(<3;)~aPa_s zZc)o*tV}IqX_nsBYMT7lH?y^@rtTS=Sv{j=HgmOnc&AV+O0H6^jQ`p8$Y!Nh37?JD zMlHk2wZ}HcYvY>}wF%retbBWNbE-Ba=LMWk*Ji@}v$a{NYpymY|M%7Q$^ZGCN zE#QB#y?^sS?SPiC=lcikn=Wg$Lyu;xl2v{=W0md0FJ)^cu1Bm2t}AkV1lOb17_P_U z`Y5i)tqELD$o0**p0uWLJ!Kz3`CBagTxM~)cl$HlZre52x{lfGc($|U*j}@?%}c@Ly6x2)R>N!58*amK8at?<$ZIsUh4aaVhm0%REVSh`yN-*S##0S%W0mct zk0LG}z|Xye#LnR5Woo*m)w0%%ZM1TBzEzN4(JooKWxSEMa#sH399~<&D!!bljo3x2 z^oC}Yt&x|FTE(hZqqsY2&DvwFaeKlp@xL|ph9Ogzt4*>tYZ4`6$Bz)$?0UcdNTD(_ngCbG5sw%yZ3d$84Bh z!@=B0X_-3nV$<6&9ecguST4&oI(R22SuIPZrLJeYa&`v~ISV_ogS?IQj@xvj##}TI zwU=(rl%}MtyV`YZ^9@!i=X>>BwohT`ns=Q)?U)QjIY0> zozFT)Ep0dNW&Qk1+V^XCZt%IEspnJa>z6KPaBrlO_nCLZ z&$LGR6?M43-jY{go&o4Ns~go$r}|{q+HTwOviO|G9(bV|XGpz7RMXk2 zb}U)(aR%1VZn~b?T{B(JG39*|h*mdcw;#p31AG=f5Pl; z5q&h;an2}r*aY#Lw_&RxCiM*f7pepb9H^n$vFuCiEXzt}bIs{)nin^ks~cRRj*UfR z#VgHCs%8f)d#$nE#(GCi*1+zYZTo`V4wuxL-2fEZC>A&Sw7~Th?jPW`r01+6g+g2l}G~IgRLZjJkth8;1L^m+(3yro@;pqWcruDU^ z<9c;9yAc+0g9Y`l<82H-S}X*HYq!^=KIw#1=ZvCG)rah+yZ%&Lahfi(*E1lfg zZge(I0@tF2P4}eR-F8;(lUqC92B^;#&~Rhj#_iS4&Ssd#6t23p6BO(9W~b@Z>&H>2 zdk9J9ra8^fih4yWYx!(htLS;1zoJ&gz20Pm^{Y|IRT9AH$l`?9hyUG2ki3#v6Yk>W z;;!~;rj_YymopD%UO2w1d%#Ivwq?AeJ9EC?%6WM_F918qb-(Cqmo+W(;+&Nw*6982 zIomV0w@hL{fu&)nQ$R@wlZnR?>(~t^1{npo7(7TyNOerl?zmlN0PfaohcxX9(2Hmd zA$r5}w%ju(Im?11y3YDZtGkM6_6QFrfiyO}&GyYhD5MIihGh>#PV#sW1WrI9?%^sY zEjoD4-!y*iDI`9oaUt_+rjB1rb53|VsC)Kh+UIA!5zcTyrcus<}hXXcOHIv zXkl^@H^L<>be6M)wTr3r)MAV~!*RMBjV;?@F9zN*(6_wfbL^UlWF1uDh520#UCf*> zIrE?tnai0Dn35mWUo_5T-qM!>gG&~R&~TeQ`z<}ttEU3(l7O;Gj-aBTd@m6rS%v#I zSvx@~pV2eDa|zfH0QO#6)qC6B)kfRBzgkzoQID5h@+fKHo!Q4w71+Bt7f{q0bLp*Y zP>OGbiyu5$$HRj>kTIx1P+$fuouDWuz}atU4wp}AFq2pGVuNEM|CYhR%4<_v?_(cu zTuX~3g?G{zrodY z-v{c)(xMR<#7BxCe}s*FCz3@&t&K_ExatDA2~RKLr+QSZ6sJH?dq=O|qXAr-Lk-co za941VS`(U_XDtJ?RbLmtSKDEG? z&`XG8QNL1B+a7`(dj`Sn)1NukHg7lYT-=3N_q6@Qv&X`li@S@-fSCDdm02<-naumm zQ;X*Ft5(;Ga@>x}lA;$kz;z^-SLsPudEAp>0rNAULc1vmQ#vUf-$h&Tt9fV#B)-bOESrBLM`^HbAYF&nuN%78b5apNjOKsS(A}a zJa$Vs>({j1te{ZM&+-}0jehQO252acx!(5Kc4Ko(&1|v=Ac{P9>I4|?wHIWGC~c0}h}Qt2yklRC*9}Ez%Cs&pD>MV{ z_KIsmy6r%fAWDNF$=lH|#&!JUfI>05+3W~EuviSr>gBOF&i%*`zh(-Pqi)i(|Smt1^zGpXo7)9I}B$-<( zrOHemf$CW|Uq<5V z(9~tl7o3B>{xh%vyr~1wHQx~ZpiWL(o`c`E^o2~{@F1~c3FN?2HT-ir{PYF>jM+(Aa`daW>R8lj^uiPrak0 zUo}sm){rXbP`?~6x(h&OP?Rl;ikB;p$5S_?JG>O@4IgQD8{RXbUWhAZJFJM+0`3--B^Z?eNc1Xt3nt7C z9WZ#**bcl7!NSi7+NIe@6b788#TljJo5kvLNRon_uvQ?iuD7#glYA=CBPjK3r<L7tzxiDEQ|!s1`$m2bBcW zn)S1nVSJ#z>~mSD4xlA#fo}Jqa%>eY<2lfV(YO=U$B_GUqjO%?C2Uzc2?hc3H)HLg zT7DFa(grF*#8XN}Iewg4zIEZOmaFLzDV7*?9M6EE)V#)x1Gt>H{gmu-Juqh>l5FA^ zfcjNT4}xu!%|Ud48%+pH(Q7*)ErY$_T^nk2uN!xZBWrgr;%>5D1-O^qeh&wrXc_N8 zKr$$B++D{Da-HqXtsSMvewL33>CPvSEGoJg{W;gAT#cArlITiJKP_Bo>ALo^N{l)D@JA6{8*P-35 z*A+jIe^NAnYQ+{p+l~wHJQsCOvnd0B_v$c zqW&#v#x#himU&0VBI2LUzdx(oU7R1ADL$3?8t?O-@#mF~8}CZ#Rg@k$NcJDV&;0`= zvF-RK8)|qv*UH-k8|J<>5cr|1vONNWd}2$k*rTm6yNq1pa)v9GvI=ixEm)UfC7wY3 zGHk{)&l|8IOMYwg4c!{E#%V>CJ0sQv?$9J})TXUNVXZS!oz~17IcwIMgY|mWI&9Ci z_MuJw&Dis;1#2H{+w;}}p6<6y+{Hci-`bB}9Iy`J{sD{p_mN=gahUai+&$ZRG^A2X zK~9z}?5{%xmtdS^pdJdeAjqNvRw#%fM05=BP=AWM9;_Q7CnJ^(iVMzhARC=}81Of` zm$yV!FZUF;#HU5k(v)}emhR{4r5EpqFp;U3X*Y#6WCgilYpqv0pT->ncYYpsMtR4B zsot9aKQ&3cC=LCe!)y3F+nO4BmbVJ=v+2%^RrC$;d8IeAy9IB?hu!7-At0jMtXOjA z#QM9h$(rW91++H{>+$~90kr;AUiA;kh-R$cWmlnc5QPIk-_gcf=622hR` zp)T4?IX=XghO$k{oZM#E|jJX(Q zA`>u=R68uAJT%;m?sgkWJQ|7GZSXy|mAng*gV45X9+jnZG|J}2?;Rc(pdlb}%HRuT zS8|$>B_gRV^l3m3ui!$XN1>1_{H^O*xJ-)2ka0aCIcPeLi(N|{nIMn!xS%}=d247# z6&>G#7YPO^&`;~LchR=#gr$;Mcm^{)*onj$2EA-|!KOC|0u7LNPfVm_SowmY^$4$~)s$^L?(jK} z*pqV){b_sWgn7~C>&Ku_BdpGl4b&K~(n-!ysK&drd?K!vCF9|RB2g;mZObPpp^5!E za?J>Rb&iQbbx|^4Y-U^~IF1!44gqU0wr<6(KB)vF=&qdZr6wv74=;>7d0{)y(BMgj zs+G-@G9|8>^Sc*z3kUW*o|$FXfE!_3@vKH8uQ!(|C}6@nai#G|qW9(6;t@aMg=5}l zK97#oKeKt?ox7iY(oCF-j$OJ9YGF|bZ|;8P$=zd@%sb3Ww=FK3CzD*4;Fk3KK#fU% z#bfCha@@v(_2#0g>`qo@CQ@hb7H~g6sgQ1|-G`8L_SuIYjMf3*;X)I@^N8bt8U`aN z2Z*<`xbrrBla_ju%R^Y{zTT|Niil5}TWxz2;t^2I;!VzDtf*+&_7)XSick;AJkCjE zasD#%mEmj^diXe>;@xfD9S6(ki97^*Ej()=frZz>Z1O18SH)=$iLH}R5T3~@_5pF@hDLe9(9_^jx3yoTgM^_6*UGWJ;f7uS%2Q^68lFI z89hrdhfvFG3kCZqyn*@2ei70fGF}9-)VG^S*1*N7C-lwk$_1cI|PN%ZWhWSF-19t zz^XtNgX#y2QXNX8P~BsAg%%hRN@%a?P%7y#Ng4gJ1ug4ktv`bMxy!ofSMz>elt=gv z7AV&mqatV*&T9Q}E9d7g>!JwC_a{UZrL`ul0$dV)(#IK9MWQl-x(NSyX9minS@fsq z7yZH--P4KAa3HRYbocS3%JV#A^!z+@O@065?)*b4fHwdnu6~ zqOaHq^O4YT>IL8=x{O8cEJPMO()x=%D8G6fYQiYl#-mkqO_;S*&#y;WSv+R8Zm8Q4 z&50TY2co<`V=QiFpE;Q|n zFffBA2RhVJKggSTILyKrDEQ3zF5bzB?#HR&+N#(4yk} z5O2~;L#i4^6raI^?;)Zvagx~wWem^)UB1Q)C~J`Nj#A!?>AjL0IN;!eT+UhRHwmV8n&kd62!$2q(9+LV?Yot61G8VB9<0) znHD(DHemHth#vRE7l#u#G-_HG*pd*0U*hOREsl})4(&Cup z5j6M(AiWtPJVAqV@;}qsH-P>=dJXg^^e*%tOWsS`ar0Y`-Auc>UX5J*=oTG=1@VIo z*3YuY*{ycrliKWvMH=8mw*i6~u{Z%ILIENoCaeD2D3h}&$8(Cxyl<50`~X)SR+e%- za$d#R(&D^gV+11&F>qwnsJRZEBk5e?{52N;UM4h;IX}YWH6}!z&gYr@6(+AUA?9>` zlu4Nhc|nEn{wmJ|bN)EbevFA3Cr5a;i{w{{29DwcNKg+o)A&Dx2H^pl*L%nILJ~0= zOmUG22^nw3+>}z@3)e(^7#-x8s8SNHX#*!j9n@IMf2eZiGt_3cbj3IMKn#}gT9$fk z*RpcZWFyQ*J)?>V3Y~&4$|jr@-^_iKx^BZN@xPZ-R%Hu2@a12JQ}wn53AqbZJmng3@0a-;=p!Us zxKIFt_R2}2Ue3JWR?T;G+`b3CIxbwP9Lz1z+9tg=lw6B31CddZRel@;FRBV}^0L#o zSXT{%TKT5C+GgNL7Bd+bP0!v8iZD@hP_r8tJPWn*&QAguYZFqqp7a<_0)&{otS#nqlE%|K%fe-2H5H=i?beiR%AXzqvhT25Eg^Y#f;gq|HoO=6%5 zv?poHQVIv9ndMT74mu1_)|6%SD_C$?1RCN}o>0pQI?RKxZA0_=l7?6o_{p^lv0&$% z)6mdnU(#T$F7%7AJj3D)0R(X`25BvHvYmTsHqTvbUS)Iqu3k`HTqeI99k9%$eIqIANZ zl`-B0j2A8L%Ussk3veBJe**W4iUX0Jj6hH-`y(?MpcM8hhaSCyIV&r%jLzpCMYR3w z8PBZ|ju2PsiIzNh30w%CSNwA7c{waK>Puu%SZXA^JI21mbrm`Cq&{n0- z?Tla6pV$4-Nnv!Q$K`SGWElF6Qg~TaKpC&_n}F& zaKxV!u5c1)dZIx$jkGsIw7jSd3`KNNhyv?k6}KZ9by#@&W-w-vym{gq6gU+grh%Mc zE(1n#83N;%Hk-Y$>%xQ}c@i=u1|4H_-cP7XJO?pO1P!8(A|!{jrf;D56=W@;V-Zrh zrXZV|h&cb=s#y{ybSSBxy>G6W%Tz-{bs3w0mrpInbSLiX^(5{D7KFdJ(LN`|5)g`X z2XWiiQi$JKH6M;tzX@@O72X5nRjDRuC0ELmp|^XwYCaghC#sTF*pA{lA`SXLg+o=v z9TwHWGPJw`3CWCqxN4@?PYf`T0}8VPz+zq}DS21bJO_>=Dgj%@D%>gDko>YAR`UXA zGhGA{h#YGE?yC85rO~|D1Y~X^6t1}i=0&=zCMQ&bpnmb@iEQynN0vmP+B@>BumGHm zQ+Gsc3u9a(AgqdtLO7M)spzH(hl33##t?ghs^W4kfNToO?))vB)kab;tyhq#I%D=m zc~Tb%3ozF!2*^a#%d$J*4-k7nU4YWul08sL4mZfH(QT?W z7Ii&TE4;$Y!uZefOt^o#sWAR^3py|*1&ETCF#t}Fi3kO;!X(IDq6;E?Yv2;FZX*my zoD<~2@sYnaN@gj3)y~hbeuD#Z-eh$@i)3+XSnKiyKH@|L`3~5mO)ydHyW)?2kq`ei z9~zvm8ayteS*xkAA!?SYQd)6ofwoy24Mot9ZzCTzB(ntjC~qW-IPP@;oQ~_DK<1WW zw)h^R_TJU=@V3$RLz>wSrmHB?e7G{8+<^x8AY2CW*%|FvwgTy93_gea<*6wSe-k*H z$QR!mV_mr!Z2~%D13nhU*JljWPu+2L3VuDkXa_A0~;{}9Z+U>S`rRkmSl9Zc1fuH+%3|pDrAt!;EI}eu0eFk^PD#;zNUPU-6 z7Gc7sqaw)p`Hb@rLZC}vR!TH|cc$R`3lAYU8R9t!JD4d-T`Tn{PQqR9>(`{ePESTeZGuA*qDk3 zQ$Ok-@pA~RE`i;8!l%sePh9>6ge(bdG`5 z;{8!o8fRndonNxX!OWGsiT;$zxt`grc++eFOwYeU=*Z~q7c-T5a^l;*&D*k2(LA#Ec?OKc?$!^lr zb79Y3SRNP^CwOmeatO!lF`_s}Xc^g)U#*%)VFf>`_&1ylwBG4ZmR>M=8ViaOxWfuhe-)I^xE(gi<9bYOOoK5gTUM2e;1Qa2 z;|f^Gm0-KafKwKHM6K%;zjR#g9*P~l~ zM_W!=)d4E8uXyj)*w~)FeT7ISY~&-bdXV!93ShO`@CFDz$pwXI zU9ssco>cCqSxi?@y3lYGsHli;xF`T4xbW7x(^6bGx#pk<#9Tjb@3;<`ryz?Wx{bXtEPG{pJt&0g!k}bBqL7X6}+z6UZcw=rDx#ypmyCTthM0;Xm^y3vP{EY9d*Oq zFEOEqw_^MS8gmG6hZ>TmP{(4j&Eyx51UcEYAkeq0pcLbdpv2J&5pupCx!`**2Eu>H zV!zCU@`Uq`nEVQoz#!^#Y@F6AsL+Yrs+CiGUZ{>+D-vOfzjBVR({=tCE1(`<1?6b# zf%a%n!mA70CbK^TGK!nu0~)@MV8qU5Zl405j=!%P2G7b!>D5t0KvWS?_f(|9lOt>5 zKlpTU0;Za^0&ahfPvKubfqWBqF8n;|q6f&BZe;bZLnVuP^{?gg6Z(ucug{5B$UNGi zGjZ|VeEu5T{T|(hn~2P%3{?ysgy85P)6S)F7C#BWW5^vev-2=*$bS!x`pOqouFrt2 zQt|%SFQlJk#8AZI(a=H5DBMeE+CY#VwQ`6Pid_5)z5zBK@kwCF@{Kz%on**}+P&^p z)J**Qj$bdNo7EbM#Lh3viIg^^H>(PF^n0S*4 z%u#~aptI_ZS=+Pnb`1FNXHY6zyk`X4S5uTdC@wI|X8|T=CNIkWAiE z@3?Fv6pl7c{!PY*rQhdr?|`CT;hRb(o}8#qQlU!l>mo819OV2a&%TT#FgEQ*M|pv* z@+KX|;Fc~ZDqc*rGHb0ftLv;IN3D_`&oEj!OtvGPp)QCu?wO8dsUj=qfE02>A`?&( z%;48MeHEH@jp`6nAxf%8KpX!iItRauCG|V#v^b{oOxjHDVbVbYZ?_#rjs>}AJ9M)b z4bC*5l0-PvWH?()jxrHNQC|7>1i5Fn;V0C1XgQX%2ycmOCgm@8w7f%sGPJFHMk+CAq-mQFx zr2CQc&*M-G&3LEs*{H0P)AX$IC%9+e?z_5`H6Y9+N+uS$RC+Mu0{QoBh%9g>(|XDgMXCfJV46;Iy?L^;8hT z!5tNIwHfO`s-{_+e+bC1rr#)9GuA9}%-drqF^7N-ip~c2a0KQ(7)*Q|A}oE!9|io< zD2(8n+)&JgQV0sUNE`AKDDWaBw(Xx9QUKybyaAP*UC3Ik(G#zRDxB^m*_ zn-ASvGSuNflTqG<2F1_Si!Yw>UDz$>WP{Z47a@?~!+Qo2D9>8aDSZ4nl8YB27Z=9GKO&427>qB15`epy-ZLJ6Y!!T4 zI^ev-6HKf@pTbSWU(hZTRK)GCu9{Pfmk6yR%oe-r3!4^_CB5l#)lF#OIy@g>ynfTu49Ez&gb6ECw z6rbK;RHRP^5Wp1IzS0YZ9gLmJz;1Ah60HS!3MPm{68*WyFS23AqxCa4@L(V7+6+HB(+$GWqi$mNoTwTpAPnx@sCy!p#voB zWxrG}b-1s@lqIGQ<@o|$q};e)2Q!g4y@~zR4V|+DCPd%9idZKA-1~QIeLouaN2%<4 zAr@@Zo83b&RGK^LE@WMV`Q&aQtc23z9{TxM{Vt``Qf5+m0Fd_T!gZDW(1VzSL^ubp zV3X|kA@aSSDTAxuu~5w%sYe%qAyO8S2*vR~-Y54hg63U`0Tf}C8=iX8vWqWG6rx@f z?csF?bo;$kC@wjExLb5^i}41MLPNCnzu5$89M^o~3!VPI@pHJMDI zn&K*uGjlq>#zbV^|H(6v?q+!QHj^(h5zPDRJQGAJVsW3BR1`TB2b`}k`F$pjF`>|m zeS8uX-v8iT33QNF4&$;`j-eWk1u_?E%z1&h(?e)F7>(T*5lZ8pC&GRJ)npEes!g`! z;SPROoFUB~qTnXyBG$d9ORR+cPM*S`R{VP2C_|Dq^xmN>k$U9c$cFGHh^K?RprX1O zgvdc0eROaV_&bZAsHlPFL@@@Wj2$jufrOtg5S-q-ojR^7WRXzIMSxT;C$byvqE;JK zo7iJ3VlG?gnR5?5g$~z7s6{2Y!;Wu}OQ;aM|BW6f@>4`B%B|VtJxNILFq2e0L{EQ# zGszd5DI}`Ny=Em{NJWyQXMpDg{A31R#YuWwa}q9)0XTWma~g4FAXW9e2kSkzFUK~+ z;eH^N{+dqWvx}{Vkz+*egQ$s|SMjh%fzQp*;X@5&A|la{2jTUt$<|bh_FdYeXCPKX zuzroia@NXY&I`RS!McQG)Xh_%Kv+$qNA2WMJGHO4I1j-;tz25{;ivT$qp zojTfBSIeJ@nPB8>*f2Kng{Lt$KzIHW4{DVi2JM7>a>&yJm2*I6ZTl>+>ci)1c^E=4 zPNgHt;VTMlc;av}gEC)=dJWDytV?BWN1J#GPGcj&U=1UMu{z%8_Ew!$;q0FfmdVd$ zN(`6G!-WRUGTgofE52x8j}H6+AAGG?=pDu~8qj)F2kR2t7fcJ1g0;L0GYdvZYW+4t zrrx&D3e<&U`xH_T94F|F=;)UuHjhg!o7-(l80G_=lk#(lXW;K ztNb(goC)U3^v5_~cn04X!C*(cqv*rUY9^7wv*`d>czCU-zEz?Y(cekryaj$N6Kfj0 zqb8-Vc;HBeQ;jihq~{?-FdV< zZ7sY8XX@Sg-35Ok$u{EsXA_wIu80b`3L41Npq8e2sm#4YxL|p{5k6o zd$l`X#{f~@(;01w$r~CZ6ES(YT;h=X0Zj!A@xSNF^hFr zmB5{8tRxfAe;+r)e?9AbiRS_nhwAsE&NDQ7wLXSeAQSU;*g68(IE*KUL6aUp{h9iM z{$Wt2qmAzb*?dIB(!@JFaQ6ZDzFPDd>0nMOV$8a(?zUAPI2Fgs)FTgl|NY3+-!k6pG!1qSpbgA?jxQ9@zKDmXp?z3}m2> zkN7Dqi^VGKa~`|b^W^97Rz!5BUgm(_K~jw-o3?#K!+SbN>q_Ut;oim^{mbEWM+#Clmyf zh4w%2jPeAwX^}T9&fxXLk>`_qW~|!Hc3^SIL;l|=&gvQ|b^naO0;tTHQnYbeTTb}(FDFS&# zc_#fj@BJF@jmYkXN~XqEhvBZY5&*Dhrj!}7&;-Eb1rU0i630S5KNwM1s=k695+SCG zK3H88!zcKvPk4I=&PytTz<<0+uv|GWpt@QqEM!3->mWp8%MHfDGJsGxfVtRELe+sR zaqf0)Dxy9vH)9k~tf}2`q6-D*%;q|xHuwblpI52|bLDxAAd7 zg#&_|y#4@7$|X=_Zp|w(OGK@FejcJ2eVgVGhcE#(jKMFFKmo(=7)|gDaSQZp;wM*V zy|28RHw^K2q5>z6oG9_00VgQLN4%%&h=F)de;3~}8QG7T#!!po%F%a9+3<^B%a=sZ zQ^H=JZBqD~LSzJevk)_ZE6D*rEajuMAN7v$2R92onUfw?vhd%MIEvof-gpgqw_oIR zh)xKCVSEt30x>F$2v=n2N>!oRo!SvmpF@_T6aX23>U-J9S$N*$IiextIk|ZQ8$ByY zBzcZ4pyfH`zZ{-}_tX%h4*oLNd;DAjpJ1Syl!M)~oBW~*KnMZNu=0n7Z&DQ5c+s@B z@e!T%)b{4f@?ODVf*D!rLH#J-6Wpn;x)*w*F?gxVc-iw3CKv|;b|~%&#BkrFIv&Vk za%KTJkV-m-hukh*b zGm%LVtS(saMcfO@Phf)?~LFgUYa~O$XFx-^p3haQ_l^Fl-!iB|H9pD7S z=|J0a8JYu5HjmQ+O($f~1C&D_rdo5pJd)jh7s8V_<~v-UV^vme}uC z_)9jrO6`t2z@|p7{xT8VI6XbJs^O&3>p2)c2B!MjTY%fxfeO1Vgsr|n!3mqTBjOoc zzJi&^{v$kp!YnI}i}SAh%n4~&@UjwxF9JifuHL#l3cs**9N7Zxu^~v{#>7O6_4-c{ z97L}-dJsg8`xIRbBvb)_nbcn#Mk<0G2w|#E#10NOR!B8%@}5BAF+BLO2ogcTat`mT z;`zWqCXk2wG?I9o;$yV(DO#fimn*E-tSIcbW-B#B{M*><4WF6iA2t@2A3f7bZ{XX$ z_$Y0zHuefATWuTwI0{E`Sl~cGV3Yj`OfIh^=mlE6DfmR1VLBsCo60-B(2ouwF*8~iHnpE zH~{cUXQvq8)uZ?ST~JclYt+BT{;@2PvL(_qB*Rl3CWTDxfQ(M31HV#xKQYuE;VCgi z#YY3Lv1jZzX}{R3xOhdkueMJtIT0$^9c+9884I5kGS6a-@tckD3rHRUQ20sDZ5SR@ zY&gC=+>vkjs(KXK=QPHJ?f4rGm@bx+s6Kqw2z$UU%j|}hm`jK=l5ajy^}z#}lW3)J zmCaMtO9>;|fSn=`lsnZ=sC^SsTVW+RiL1F%$a!(4Q2QtQ=M1+L4?-;yrRU@7k}Wlz z=IU^PaFwa9@k>Dy!}A-A$Nf;24MlB%N8})tC7r{n&Jd4?WkncQ>=x3}v6+AN^BPSL zP-x2cN9r(G04L~;brk&oh=3D*O=O@nq>%c!9R(Z+xH$L`g?1C0Q(5s$f$oEI#Lm2& zTv0wHoG@O#`a~JPESk#@J;db&DN!up7gzdngjkofXxcNao}d@8v<}< z90HlE4JZ2g9Pv4K6u>sEcvV4bDHg6Tf8sck|VGV*H_Nu=OHWH!QaHHNWd&O4~ zyxzx`e&AA#(ybAMte0CAe}v!u-Yw(lsIZvctJvenrxCt_d*iQa^$C0>yewBGUU}H4 z)vw_BC~{8wqukY=1};3Cc_s6T_KN;Wb`AR*!H2!LI|ftIxTV9uB?dhaZTE%dpKpAZzzBmdboLlHWy~LCFgJQI(?1}N;#1tw9-sN*9fYDI&nS#F<5pQVVVj1_ z`C)&WpEH1&X2!>k`4WErY3%P)JZ~VJ{nIe%6==%yi%7>2T0aY8)&#uTX3;m(pJlr* zemO4b_!HJ7o=&{@W%mEYFQbJcXphD;7@7PzjQ6NN2Yff(_z3`wm~ldGC)yXKVJbin z^M0tk7JGyMMhwFt71}GN94<_8q&7e4_boNvL}5F7yKH^4W6aZQ1Xv`^zYx#J6p=;T zV>6<}rHbL~*N8EXF^{m4iWIW)P^qTc_!hMtBFdqb2hae9Ejz00!TzzA3Rj4Zn-oqJ zO%V!gn1b4&-~oXMurM6Hkux4 z1s{ePZaCQa2Qgw>40}qgwd;{2k#QoX@2Y~b8g6g9C!&7}S-@%h+!_*~U+fzG zDs+(eT)a95>fhBt3ZSLHS(cxSZ$aflzc>1G?HYK7kH3LHa04}b;+DjVuFgc#kVRn-F#e&*~P_*ae z5Xtb9MTJo82y#?VxItd6r$B4JWjG&2Nu~Dr2+zKowV6zgAmJv^u6r94y2?i!-_YhR zJNyJD<9&5F7wYdK&Ch7PBMG-awqB+~5rGT^Wd zX+%6XLsXlabj5Aw8@VC z4w9f4N!f~f{t6!?`s$zK0n-x?3Pd77_jG>-on;A1@q?)-(Ugs1l1(Y%yBh6x^*c`2l zHcORKbF4Ck=V>R?7;jEgCgeKXm~8H->}XC^rkc~0Y01kqb~blacImoyS9ZIF%{}hk zD@JAC2U1SnDZHF=3hoUrr7JhO#>!3nb4D(kPSF{CIa9gWIpUftxAgtJi+9f0<(xC_ zOuU?}Sg2>x*@1d?xS5q(QPY0pOgYoYnbtYCIY*_A+vVL|m($K}XAj=J110x5`%rS9 zdmG-o6ZbbbH{$+A{pP!obCYv3a&Fc+2b{b6`orGbayjF?%dt@7jB`)lQ`O@B%US0( z=XN~5tM6&}?j6pZ_}cfNt#>;I(AELn)+};noV$>7m(DqeocB1h$eEQVdMFnY#r;`WL9#i!S1(!uBnx6wGezUH1deJaR2dVFp!Fpr-^xyd)Yw$=m&VP*`19Y3#1-HJ;2Dm2*&qPn!_#gC_gPGP&dK9GtDhE}5!~hE6NuJXth&sEm@0RyJ^jL+FotB+-jAd zXgl48J8!kx7s_!BucgqvjPEuUy$SR|k~fYYT5dPpL+f3;wR~vJYcF|r(?8_5yI##b zw6@+^Znvsyc5T&Oa*QTumg`t+HhS)29{f4^ zvKiCcf%?3iOrr1grf?f%rIvlTRNXZ<_73T;6xuS61Rt44er+$cg1L0eR}EqpKz3&1 z&PVG$;WzH-&`zG3F{EpA_tzqKQr!myKyrf2+*W5C1YdHkdCs{OrG=5!Vpxe zL9trJ@BjpGU8+{kb?t_F;!UHnAhXbJH&m;;`&*T9;t+WyB)s#9%?oMstBleBQidTrzuR$LytEGHy&=N;~OZ8vkc{__vw9 zl2X?kL;FQk`vJEFM9^_9&uz$pwrUOA_pJ+VeQCMlTMM?2S;x8|RCrt$zT4`cWVm9F2{0@s@UaI4mK7Z>ZbIzASyh3KIk zf35Ag);!S4{49ors3?3@2BP6!bY}NH^S8kZ#snGgI8Cfd7=kH&o2 zG;|HY2nuus*(%^Az=pq3T<~6i_nWxrr8ZMnQayv|YT7&M0F-(C1rQ%97gy;oW-nzs zMlbu4@uC4RHm{_d%xc~{jl2w=XTNCjNhiIMS;?;Cuo$qTylP&`IR)h9dqyv}Q|IkW zUCQ^;&dBPB_eu{}n}sW>`F*2|-YVry?N#{S+X2tQYtG%n;?VzNBce@2-jNPi$!S35w$0pP|xKMA|-n!MX zJxpu3NUpy2WHzxz)TVZ-(Y8Bh-4?(!g8Anju{*WpS?k0LvKcfi-3zrU)Ox?p_oc93 zwwgBk+p${MWPRWE>gfLhwwacDaJk)?jhbHQcC2RC?^r$%j=#8W`E^n!7co?t6i}T| z&>pS#t9;7|Ecgi1g*c8Rfos>6xz7>JsSgbCTD`g?-V8dg&GEyrD~lR&Wj2NZHHbDlfP59nK>&<`GXa@P9D zjP*Y2;PYp?&FUvst>>|8JbuY~^1$j0Kz`}KM{qHtD!2f;%eDB1MP%#LeT+HQ39#CN zy-;t|JL}Bscy+)D_M@l|(!UG!&NB9+TD#fA)cD=CHK1PW@n{4WQF#lWD21c_=%C!5 zab1~ZnY4IE;4+n+?*S>b-07_O4;&)-CKO@YTRP;lYoOmcq)ZNhLR{`N8@I_E>XG6M z$;%r>t~>zrhHGtDvKr9UHB?WcMmn#SmXrvL;!N7R1NbsX``u=c!D@FmZnZG(Tt~JA z(xmPHvNhth6#SIE99{uAns(~V*Wbo(&Lb%yHF@vB)yz0ZRrDb!K4Uk!?n%#Ud%*4O z3oibX&%zGW3W|%}mY^WJ5sYG*s(yR1)3h&29YNm5?&P8pv(pZ8j(Z-ULjrvb6g2iL z6l2e4M!h@P_Pd$f#^eB#86+s=FI7eb7fJ2S%7ofeF4TRu%0a6X)oQJRvi zY-RM*AmS<|YyzXf4!<_H-joG(Qepo&HDZ~8aZ>v!iNEm2>$Lv!xleae*b`GPncO1{ z?5n+0HH{rBRn7F$NYP0&Y1&@_7@Saq9;lQx*AsY`ehkA8z-++Jz<)79!@8}tTPtd# zR{%R75zS}GKDeHy{cp?sKgveIABBUQV#=Mgi{%SvYCux>H60w zu*gVNlZsWf@PvaQtMdi?DWY-DxS7L>${8CsYys+sF?}r?j0>mg4{n{JDNi)EJ0M5|!Wb*KD|;gkmC*AG1addVUVyId6ew z#4Lk;Aq~?ag{Wt2Yr6vR_~=Nq)HjXlsZa)M5jzbEktDPb4d?5&0MvXretuwPzP}Qo zE92fqH1i@U|GuIi=xm7IEPh5AJwMo9qcE)2=sJJ_(#*%7d`uvjM2nDPAg-b+lDs*` zL}Re%`88?PAS-}-7Ko>$*<-i4;5yzREOzf8k|3vG4(xDDSqM=fs7ok|th_TfGveLN z*Y06*FW=3FB+U0Qi#xueN1|6}#uOSn$w#M{e1ORZkpvmX?$|+Ib3#G>VpW$xLKrGH%l4v<$@4fStmzg%gqC0r=9#|U}BLV zk0f%GVgcmXO2Hk)ztYN>Q-b(@%o&HQd0dFG3E=L@;HJk~kZ!KEDGh74PJpcdivdny z{WJ*9FIT?u^TB7|{KNO3otr5H6Wtb&cQs;0AqeTaEjn(^UWZV}T?|TOy{a9unINt6 zc6X@>GBqgaoGOKj!QSw0p;h&|Eec?(U?w`gBK!)8VH&|Tc9|3fD*ghJYY~2s|D~$j z+H&4KoxEebWPm)(BCpUxZk0leRJGU{T}<@~pCj2=f&eB}9fSNZRULOs3{J*=0bS7I z3f}@N**8{JVz4ZZR)0gj5Zt1q!Ddg-zGRP5x}9L!XxSf zD-)a(O_w5h>&n zt#nZ~nL*d@;On_O6tJ9$63!`JHxc*+yS$eV?E{Ihw8n=@7O*xcZ0n1+0suQ3M)7gKS3uprY4W zAU+`!*wEeMpaW$4dU75RWJ3m0j$wuK3X0{F-SFL6@<)Dx1dQ8>$6dcbB0-n;-Hv>| z7N+?f&4u z!4vG+Sq-LZHo1HEBGNjyoER%ZEcRFv-I;Sc@qC6tE!(?TKVPOu%UXk*lgyQ@k?^#-2$QPb+`{a7e3R&laO18(WBjtgeD}ioeg`%Kcw^=_3OznD_L!%lL zvl~t6(ZfgXp9O9I(fauZj@-{ty8FFH?>&F_tgOoN6Q`q+&(v3K-+IQyvYZ8T4=MI( zySXrHeW<(KwjQ@zExUmSA8Y%|UE6xZZ7tQ?@n9bX{hk#a-a!p@IrfRQ+PuD#wuO*=1=hF--IOfcqkB|P^2KDkzBeYLXwR;`pBmQ^+e(+ zmX?@`@z|orPosLu|4!h~|Hnu`+@-v`kU|o&TJUb^n49UIafQU3aqd3o)iS&@Qms<* z3Cd@^5Avw(2$*%TxiH1O}(%e+OH8PmWwN+FPYx+EYmZqrE_v6rGFw( z+UhI)*`5i#8Qxem{m*vBUNp{qR-0zr^|TNXg0}QLL_396JnQ|$pPLz%S@jn2sX?X& zC0S4)Xv)>@sv<{15*yYOG_g?AF6wvAeUQgGfx_Moigf@NcZr&-IG#~abIk#%9dUwb z08MSRp9T-ck3-?%k?asLIRx?oDdY+RqdCabO$>DCBapcIvnZR&=8R$*4A9$!4AY!W zbC5k3)8mjr7t`whDRX1?>LAkp&3#rLIktp9fsP6;Iy4dB+#=lrGl0E>7xN|2YT_!h znH4=OQslvC0CT<{(ylM>TojWRwL3-nLJ&)10CR%WA zM*Xv@mvk#y@YtAyYK0QhQizH;X3KVWwcY-7d#PRn>E(gTfM8IDGyVBSj@V z^0P`v3dOmINiSudP2HRNNb0H7M^oTLO(z3^YHBk_paOtFEwK;ip#vf8<_G`~3V4WW zq40N%2T>ytmn!oT5ZI+$FSj!4zt}0k7=XO%nDDXVqUM-Dt&_;jKwVEE?hYphME29j z-O)?qT>$nrZ6W_b*e0sy)lsx4C0{Zw74Y^H1jG`S5Eksk2w%Cuu1gDq8cx+ewz zi?mBB*k8oDI}37*B%&gDNofLyk>iM2-H@FFMIchH2)tODyaA-czF@_^socV1C+N~A zBcWha*$~o4_367RV~oC93;V&&{Q1eKqsF$fcO-4DUr0S5*8TrL%pQTowK%Pq-WAm= z=4*S43Jz*qUQ1XM{5qO|o5%B5NH!nJS)%T+3I&$2mc*yYnaE)*2vk9R3*N?}!t&^N z-B1h@^DQC)(p38O^*Z^+@P0?!%DifK+3o9deLIOow0-f*_=^Xqszt`v9Me!0BT8D) z#0dW!zE0jm%QQmi6aJ4CGco6IwVm^LW2p_%*K!lPCyXoVYb0(G?A(FFGgg8?OLDyN z$R9W|1GJ>{S^Xk1c5=&lB#R7o3eOQ$VUSAkUu6XiocL60EJ@NW1_j} zE7j6L6$BNX={)*|ouHjNn1b?X=hdCNP;ypnW8Lw$Is`lNI!!e`m6LwGmRQ!~YwMpA`H zj#P5|&g#1NNf!P~%t=G85R4ML=w%TUc*Bt$q?D+@_#32@K7@Z81p=^&lwJ<2}uUU*wUlPkgFyqN0c)U;KaY=TqTT z$ciF{%9yVy^$S%n^3OECIysN;d9(B$NpF%bhOS9C>o4PF{C?y8a0M{V1;$yWhhUGx zUlteq6Pj=h7gWQ;;PF0fl^iO`!BQXxYIsnG&?sy|MGx9A19d#?lF#t1EZ%ygmpTJR z1#}^dI(x>Aq_#jO;!V(Uijo8ilEb^^%_%+s&6O8B=BH@-q!j9)^2LG@1Q+bTAOj?l z!$fFApji-EP#w1^OC8E?%XU^^YDgYIAv2{4$gGNtwEH5nc|2^f}nE^VgFH**>9Rn$Mkk^EEfnz#<0`{viooZmBt8KnsU|N}cE^3K`k<-60zY6i zpxTOh_*|s58_jBNMc>WzG2T%{ddRE;C-m@is*gZlISa|V z-wE?-?iu!C#_G^VPDA9@9-x!SwcyY_3Nwq|FJls#&@U;y7diU2=!dB4ncw!K=hdLR zE|kHGKsRT-Pa&O~$p#Yx%TQ2ZuyYtZ2R9@}dH@i&I%W{PaUU0Z0#Y8KJ;90zivY-B z9J6lxu=zQI5*~^RUo!l|PWn;~rgJ%Q%b=J!$GyD@Q6bWTSR9Nvv^yv+VXv-^I$5Bz zoc)b&9f8T>B?GV>5@UgZots$$yEn52Msi>hrNp;`)L6l_5F5)}7jk34t+WGkx1k^S zh`(WgfiLYtbS(ERBRhFHCNKuz3{CZuJ1qVkLVB#~JB9dI*SCQDB5abR)L2)NTI2nI z?)v()l;JM-fOXvZV4G+VV(#N>(AUvd1(@Uv^>k-q<>cu>cNu+u&`pb0!EUZB9*v3MceA0DtmS$eki0SfnTL4NApU^L$b^YvCCL$*_E0?nK-nj5>quP1>QY5w3_ zE7JHanm;0G64@v4=hKUBDFuBSQ0xlKx}di?3o$p?uze{C(aGTRtQky%Wdn$xii(n1q z`7ROO|9)pTjnh_kiBBKA=vWTI{d}*uNVBwpjnc6rtD^WRW&$KP)=90|p#WaPkfP-x zfW%UwvBcI*MZ|ugJ%>nUpb~n=BKK&oQltxT;ui7ueo+GKhmm$D_uIwbUNhA-MLSB!gYg0RV8-0j%$^|Y*{vm4R!5MnHNbi)u}@Yc-)g~#VCEC7CK3Pz zIr@vV9OaFag$lK{>Iokw%fVvdzs=-_k&uUj|0*aVjBk*xt##EJB!>lmAkm)^#4Dqo zeWAM4g()+1vqrsA1a+q;t1_W4pdu66FUU&4pp>9*{7PvV4$Gk3gxs!7H0rBvqb~P! zbE%Akq-AH2Rh+D90v?V=V#W7)E?RzXY9eEdn?+$2b8v>uKsrQuQ7%)$l}Jk70uT9( zTv1&YbH((eS<2+_PVp^BnDRws?D|$N2bQw_vFpGtoT`^w)%?7O+JiuS2jt%PnSD-!n49h`}1qlq6A(jywCLHs@ zR6X?>{6W{1rWpW`$emco>D~xjBH$MBN~$*kjh()qNFletoO92AI`wH|QH5|QIE6={ zyMuFU3hH9I(2l+WN8PBcQg4)Z&PcEHDfH~p81BjRhWF#@d9UP*$ny#HJi4FkfSvA* z(RE{AYKfY|G07Q64(gGdaT+KZu2aA$9)>S-2V?~EOT`7K6dy0;Uq zzkFwk@1W+?=5%kWH;GnuiJow$QxrYn&fYGRo)SIbxc#G;*a!h#AHn(;pt_R<5qaqh z!(88UE*a)2a}_jARIS9-kxR~1TxGNtGN!k#MQJ8fO(Hz_p6>3Q>Ko&^OKKjYwaWRU z7%jQ^gd!k_yr6&z35EVw#9_~xAK*HnSq$Z65U0>$G$Hp&%w?$eyVi7u7$1HjG!Li82f5kosE z$9UEJ>*hJg{6UYfG9@WC74eaVy#6)BttlT4jMdl7$}Vo()o4nz1%W4^B_NmPp+_v>aVLrc52AmshV#!UbAt6gr&4>sE19Q=pMaUI@|;QhH+jP0$n zn-pHp>{1Guzsn3N!MwlEM93tOS!Y(=bzkYoL{;+|AIAigXuHbHqw{r8l;vF|t)etQ z)T1fq>{uJNJE0Ae@0OVnw&U-xlHCbNOvoB5J6nlyg9c;?+*YHqt6Lj3pg)0{5H-Z^ z=n<}rHR%MQ?|g{ym=BExJXfn1;rlIVs?t^I}l;xMj;Dw4T`%-V`r{Z zFu#@qVTUKhapX)w{y&+`fKUURyq(EKS0L)>8V=G;lXq13jc3Qr%v+ha%`DuojA9=3 zP(g^?Ej%3g(P|ZB-RIfQ7m+A2-ptg11~AB1!3@_BL?{h0;-D%}z-WV2hyqY_e*%7@ zHW3|RmQOhl+v@{Aho|4iVY&+!1tW8l)(C2MEPRA%etq(Os6dR&o8wQ)XPM(~czmyh zzrJ6j3ZYUnQiY7dF^le5IVv&|S9x*OQfd+9s-@ht40ieCXPptJ_;N1c zj#Ucch*9D4eO5WF%X)-9H5(9AMT#K+5)=V3VJI3vUEdvo3GkkN8(T%797bRd`IEQ- zp$K$4UwY+IMVrbh_+KEjv~7T_(;b9~Hlq0`W&M2EdGhz63j&4VbmI`0&aJ-lF=uxc zN}dbaOZbL_lXrajdlw4IJ)Q`3!v0DKyXskn24m}JcE-HSJjc^n#MgW`N zV(&9gdDGTDMs?z86NcR#{DIpB(pKI(9cw@M%Gm6GmcGBzoBi}2eaEbH-%NuXO%7|+ z7j9s1fC$Zd!|@q52k{ukU=qqKp~2M>rM{Z^ZFDXP!-8NExzZ&Tnf9jj;p0auV=IiB z5t?Q6RfHBpuY&M1l=3Ail?2}JRNdTq*3&eA!)tUls_jP~B;1ogSZx zha!qbbI@qF*9iNNiDJ6&O>PV87OK4dxyCCoED2cy2T>)W{8^wBW!3S|c=dBkexAuc zM-pVEH&ASgEM!QDVPeG}C6P0Z*Hin)l}mv^LE^@(QP_uY<}@yfM2fKV`5@bV6iGzI z0cVNchY0Wl{(KJ!BnqJ1pvi$1?5pYL)08Se*hf(wf^B)T;B%qnS%FP0M2N^kSOq?V zUWm>Fu-J{CWoR)AvE#>)=KPO4w4g2YvO6JDU|_C2DIE?gZT|p1?1WZrho2ye0+HIn z5yvz&0YK2PZN*{Hv~e!g3{ef;Z6<*1`k3vIKv61eK=~G#-S;@(?*`|~wanjNfbc`F z@L?*fZVNaLT6A%+irr%c&yx*7bO0ToN;Y@FJJH%a8wSTwU#8Oj-SAq7@VZ$0B>Zy| zj22V^B`brFL^0>`g9v_|3vF-J zI)Ks);0EA1B@7Sa42?Mmc@P;O~Zr``;%hLutVE1{2u1;dIm>^B^Kdb-v-n-cG!Q05~F%T#J;YMaHXQy zipgzjK78wd5+H%#m)MXq+QcM@StaUlS@3K*5%xLSQdxFB@nBx#WDrLr_{hyj5EPm~ z#~5VjbPppMgTFM zbFV-xo?T1w~}$fv#vWAi<|+*qNK87bmdqE{u%?rjII z?1OJ(r7dK};6&RKIB#bf|c_P5vwL_*2z8sB!dLVn&)k3F1UMXAP zF9X+zNDvuBNRmJjF{SAJUtrcWlV4;)hdIUzViZL+xG;xZRKi2btbzAjrUG=sR&Zvw{Vd<0Ka4?F-3Zn zHs35D0OBNMX5yJlR$@}4|A=!?^3QC79*|x9pP;U-q7=5p?MbZ3p2Dt4Z$P*tABJtW zAzHPBZt^p!C}tvTyG4TtV+Q*CK}U)3ZJP+=y9CYU6$JKKrPcTdObK2=J_EBs)&z_2 z5j+*oLD-|ERv=S?yD_3KivBOx8QUCpN;D~GG00mUnn!vNB8+cBKZ^IzqIs1T-$028 ze8*sm3^1o!%5#o^ zWjFY{3C2|i&$g>KdWHJqu}kB<@vx)W@#^Ni!^S!)|>5ZzX22A3e*xmjPomJvcZ242=tjN&aL+KRt}o|{i^`!!K( zTWwq3+2;(nZtC537$2t}k!J8*?r4CJE!#1?sId1Cz#{r4f**EF1+q!J2P#Gd$V&ia zNTR`nnxQ*Xbe_IyqIPF3#=X7}3NS>Pt9vKChzioS@ES%v_jIhy061Z*Z4H-uPFg!%o!og>WgA?l~kURjGB+8R|C_0Uu9Pz@8Ul4 z`#^oQ0IDP=WMt-f(nfhGnTgQ+LRE$`^S%Z(vgjuji7TRhhG3`!^bvX0S4(2l600vF zUxtW{RL~Fs0((nTMa&vx$uJrxM9vHSH*d)sw9xHqd8;sfK%#yva*Pay;Ky)^PYntF z&JmB$sv`LqsX^YOm3urlb4oqDj$J!6&B1n@`;L&{TOrdgc`&g6^e9b zT1l;A=}t@%E~9j%q;^|nS63;i(NlMU*?{9#_hMxlzTwpICs$HnT=f1En;TU#SEaM7 z^biqWG}wy&7o-`u<V8`3lCA z*_Q)%&?)>Ca#LxfqC1|-QCkchaC$F-aPEwpLlLIagsvC?#!GKf-^}t!`Z*Ldol=q! z#OD-RdWb9NPqWJ+V=C}U{6T+)SHez+w28Bz!0K0dbsf?s@*{bE{VXn^`pt>F>5YUV zsA+y}@wzW9yB~??^jJ&&LJL$C{F(>mSjlF+18Woy6*!Q3!Wh^lygliMqSke*#I#% z;NPKo9Bp31Fk5ITqe-h5qBi9;oV6&JhQe@q;a?3^4pfNX$*R-p1RYg@sw^JAtM>4* zB~S^Pl1L7S9)To&Px>3Q;MsMxeuh=GTy=sM26^eAGM8O$w;k2RILIzkQ+Hz;71|m@ z*W>dRg1oLPC@^Y2#1RWHclfU?{ok1UcO;cG`jbHL#OScxRr9M9_JqwMvQv9=^q{}aAnmScfgr&x{DB*YLC?YsL9a*F6QH(<{RY4XHuMTk zZ&6Oi(AWQ4thN~J!b8oh@Km$!DgbV6T>>urzW|EQg$xLYy{+9$!1G{dF;M_U(5Z>w z(mvu+HpE6i^ys8{&bM)T1%yzD;RI}n*l#{e!M{y7-`daCb`URw)qqpZ07f~1m00hP z5r7ctd>zg)?S??mc$zeGB$`kKMm; z|2;>K+;{ik`|iKbwU610HRsqp?%^Xx?Ze09#Y26|?NBxRfGQ69kyH0@F5GRW?>i>_ zSB}-NoojY+bjLDG?xDS*0LJ?bCL}wytR(NZksGApXgRn+)$(JjW7R4#)lP5%Vrh|zCl?dXw z2X!g;@Iq4`2J%{Z03!_a-uoTAvk&+{ojaG4`x^54epDdv=Rb+0Pk(}-1&A(f;_yY7 zGw}cq+-J3CH?=6^l@aJz=)H|-(kn1mhu1bEOlK=&;4po^nCi0AH#zGWg9K zk7#70!jvQ*hh{2Dt<0S+3S}nK8}XiEA4q488FV|CBqv#2aIs0MgHkZZln62!M=haz zVPWt0n9yz@qR=^690&eSSP?V20YwU(IpI%)RG%S2lK`B)^$-I@uVlZ#^SS5<3Za@h zp9>|ST3LkYf6u5RaF|df= zyydTNqJS+!olW0+D3pLD&44Z2z(^8o3|cs_luN*%i12F7A5a9)Fm(^`m_D;*>k%cN z*xv$kv47T*kURVzrW5-wp@-o+abz5Rp(c4=P~BlL8Uf z5_zxmF}&h$CNt-eLy(O^aLgOi@w|tyWxY{iWY&5MlU;YpO8Fp45HkJ@(RrM8{4Nt= zT7I8bL`Z_Jyd%64G(?JzM+Me+hNj!lA%*CJ8tXzrJ%_w6HN|L*4Cc0emv<8P??;K$ ztzrDtf&n{VF_Y4MnAw~$g%F$@H$K(3v`!PM!Xs3b(jzmd^biy(yC9puj|S%?KOe`R z|2eQSAUdh)(Z3^tY zV6%0pcxhB>L(osrM~wIAt_d4m5YpK0@IUknP6^~Rv@N~d;1kq1if3ccx)fOg{$45Q zWlGQsj5=elrlDmZ@*iI{p;>qcBL%GkJgCN7rzL-a`TjFLLT9%wid8>oB zM)X^|ogI9F?PvGa@rA*Bxm2lgAGY**7CWZNGLRZeai;zr$mYz6**9 z^~IE+52!4@y&*);uX6~mq8b@SaH!&C>>M3ZIXE0!2im)e;ajvF@td|2TJsryS~xyL zY+ma^4BP6+L8oEkA}UvB9*`=)CZ069n-A^mVE?%w>K7f@NHs-j0pycJ8w@4wAl^s@ zP)ibqZxKl;*ZZM^+MdwoLq{&$3|;laZUdIqO6lT1!A`5+Ua2Y!WDzs7`L;r%g^AbSzA?i#;H3~(GJmMVp}M=`OL>_zzx1W%I6 zmYGpB#ssUAL{t)Iar**kj0P@JcNQ|c#nBtl!}77mS+2Mja@5fOtbl0Wv`P%`bGghL z$b)AR<*nwMlt(4j(OP-lMry>FPC7?Sb#kvT#HtO7hf46FV0wN;J1sy zZ}Zf>k)UuILC0F1(?{pLF}6nV4u8A`Td90|IEDQ@u8Kx)-;81*kpOjO0;bbUUpbgoP&NAK>qt$}omq?_3H#|ffYjugtV0a)4G@@agC<*KmA&;mW z*LAnkM%*ya#~!$kr;eY*p%?ZlZPJuJ`tj3<|AP)9=b7UtLxCCB{(vxxSpxzsW`!YJ zz}2;&55gKO!=UrfnzQ)+y7Rs>N0yFt)=qxx(KAu!=72!awUdEAdz2T?2xRtjF=^GNxf9S8QpME6Ehox zHy~VznM&~z!WncRydpvD6h&Dv+9RZmZrsxcB-_A;zsA#oAgR!SzyO6WanDE$2o@P_x}NM17&C^1G=)Smts=Ym`s92%)Ty(2Lj z3Uv%qpPuzT#JXQ(@_8iG%dPIO`oBI>Dm~$$phhM40w}viri#jqr$G#%n~FRymtq4p9KkWO&v|M0V^0 zvD#DX$jG(2{>E$wm^g8u zQ%B4?STn?hlrSW8O2dH~X^y;vT*)iaPHR=T0>dA#`F8?;;yo&oK*ivWcvC&IHOVf& zoLVyA00c8?xb>u}V1LocjQs^b_dDrRf{Gf@@NI*_RR1AbIt?`X+zIT1^u&81!dQGw zg8Sg)QVPe2D=B246vK{0ZHg9DqO?%d;rG=sFPiYkBN|5*g%RjNa~5r<6S`8)Z}p+< zt1yx-)x~v?-vL6kE=Pw8IlX|uI<)ZZ=VH{!7>MeK{RwXC*asm0gQ>1n7i}TFX7y(Mn!1LSW0Jwe3sV5*^+0>LicWoKU?3I(#Fmc7gO|3 zR6z<5RsD$>zZTv64h)Dj%=v2WrN5YXY|XG;Dv9Vb3lBDMl#^pWbP!1PPw--DbpKRI}VMT-uB=o1x`}L(JbhTS8zPYP$ z$p{2Jg^O^Tz-EAmQ`{27g9cYCSXYSk4G&TfS5PXoC*I`->#Dw zH6YXNy(v`nB-e3pvgg{+43UFKZpu*?xl;+E(s;Lv0}LBLoQRx8Yz!*Wz9_+hMb8o= z_5Mj4(w(>y7i_4oweCT8h*b}ENxBg?FbD-`La=yW&%@5?&Qm)yv|7>MKyrw4N~8BR zP@&!}uH?#RCL9rc9$Z*Hq+}A)cYRw3hnhd}X52V>1{i?jfQ4e=EIiM;14n7dl9-Jz z)t-z6f~hRSyUojCtMKwn3QR_j1S32`uIfX@<2hIZKzD-0hZG%#&rcC$(C905gB(9# zNFX`z{X`kXw}H?y8&lD0ju)->6V_WaoZ(xL>x1yaRW5vsIXzOQnT;Dl zijNSW1$Ua7c>No1zx}q7DSaLtf`Vt**xZ6gF!H&$<`prj;V8;Sau<<3m?yxwivzL8zX}a)eWnO z!x(_8h+Jo_^>#OOjXpj1DDx$!71P-}6V0HcYg7+~T}I$|n6N$8-+A+@7~ zlh%u}*!q{3CJfSw(axggQ#iO_fRU8;g>w{hO`#Lm2^EkhZaAh6GI)BDty^e3>HaIi zG6>R2@4?lZr75N!OV`-Vx~u zU`O*UiKFQUg;4hpj4;Gy6+11CTa?R|-9&iL5>Dl8dsXlmUj1S)4!=aVMZ+Eus|BM= z2%CdLT;PA%4svT%iWm@ns@)0=k`(Px#D|B!QCvi8m4ZgD#wu7;qW$ zltsL9=sgjy9HuFVR}O3ko$XU%-h%(Jl8f*UF3S3*$;*h>65p(X*Fxm;!DnAmep}v` zIz`0L9Hn^+aDo9X>@USR&`xH)hzrkF89SW=w{yM`G3?NVpU!NGa`K&@qP_7 zH_dNAbF&}*|KcHdBh;dnDO{u=r9wy+ ziW<2Q4j#>XeRxI#zz|FmyO$CSqN5>|`HpT(?Z^zvu9SaQtwtB=wjebEh7eL?Xl)S$ zqB=(K*iJdfux;X#0gfB~HBC!Eb=FcgE%Jm?0~smSsh-DJi@g_&wTy!juL<{NVh;t0Aw)JVX zt;P{WWivQ*9gZE*T3`bzGd4kD$UM$o5<|@cC?G~)rSZ!Nq=jw?dA!|RCbmW!PGdlJ z*6HJC&U|=KjOiDim6%W*{tx%8Vp&21ceg4BivPs;^# z(oau8wBD#FV~f@l(J2P8ym7GaMFr8aSS6+a$2kGz<&9lZ1({DjY|WW1ZcNkRk8Q%G z4S(XawlSfn9>gKAT)DVWmbTz}QG<(}#xWXQkr&38C(&XVwpcqzhfFb|wH($v!4WuyHB@)KB3mu()xbB9t&m;JkrLETj`XDMcS$mL{lR1BAxGl7Ku^_ z(pH%}H}`HzB6t!R&mLPTFep6_bGl?;H-}{tB1)Sb`cNa{6{2N74B1#(!s|oMu@AAd zJJ}T&121Bic1)dFSQ_8f|Flmkom9{8Yzw< zR00V0Jq(C20zpG*OGsA_n!Xjr=Tw!aZO~7>|v@ zlUl*Qk&}E&7`{30&oGV~Y1FHtUQLSp7Z&&6V)SXT&nru{M_7D z25@>PUn~k%q7&Nmr#zAC0MZr-}Lx495Uy`oC(^U7g5O!Gy#UkMKp6H$bxrS zTbG`zi7hBfxz&;$2GikfIg~~swCug?>?e=}d4K@|FE0U~tgqpJWqKonA;UuP9DQ0l zHZ1AZJT75nEnVQ%2oo-C@7I}-&-GT2khAWb7O!A6?1aD|*Ipn&UfhfSc}zWyTuo2zA!01uim9UN9Eef0YBHa4<2v$bj_b^K@^@Fh zi@&?8nS930(^JjpAuUwx&2QkCNTsjZpYP}AXl02yzk5JtB&MHs=0iwx<9|48Q%RPKHf7Cc;94#MTJ|TKnG@)$@-P7_XuZL>eBO&9|Lh2n& z+8Ussg^cIKhI<;-l0W%+sHWct87DO z#O4Q^#TK#kQ_=kOhkG-dT(bx5E#6qC%^|NqPO>yGm@^gui2Pu&g z`%&ViIQdYC3Dm#IxIts*8M=Ws4m^m7gW?cMe1cQk=2RShpo?e3kxxbP1@Wvn3cQ#y z&WmFU`a5CyO=IdFa8c+Jq2W`FgOjpYs%6cY@w?S*wJwZ`nJv~twj_+2Rhqqja-m)k!@3i_ zRx8zOPIPiQ&cjyPK` z)+|FV)+-oey*65}nI+RQYBP7oP|`_W938vPS)DX`e5q*7%yUBg+}Uw@K!6Hz8VG9C<3v$lYAROIiQuW4k5c$nK5Wj38JYm^54%B#J~Q~3I}uo+ zWz7z)Xy-yV`If<>#qTSZuQ^A)lw2jv)y3c4-Ms?!eQR=@oxxE;xy5=}whKJ~_PE z(QY`}U8fUkyMzT`T7{Ch+fr8sQ> z^9wWerJ7|@jEUrG@s7JbonAM_DAo#=QN*N4CuN$I0&r@+F4XhFTSYmC(yAv^ogyYq z#-%Q;m~^SOh+c_0!7kVXHzu_x4F-uJoCNw_0wpkD7X76btBBJ-h%PA3C(VAP3-eK$ZmKEKN*;;$vuSu7mRV77y z0nfHB5@=})zNy8nwLXz-K4Cv0Qb$A0C#DbcaQHEYy!FD{L^ ztt)z4Tqokf6lZS$&l9tIDHL5?cXzPv+Rt$gK{-reWkiPfCX5zvvA2N7=n=j5^*8nA zlg+HyU}rann%nGcDh)l}ZuOLV%Nxpl%l-CtjEu|N(^mB64xu%7*3NOupQAR+&!P4{ zu6@AE*W77k?42B!;k{C8?xbaOZ2w+7;$Li|>w z@3H%6CNbgE)9kB<3PdFYAqB;LW3(2?hpoF|F*3x1vpcHi>G zl~8l9m9X~$YldriKi6kD7v=w~pDSczB;Y)C#9AUzi`o5L{%OXejrLyq={cQoNZ%Mj zZD}zOnD>m>Xb=Akc+1a+2_Ak+ziRi<+%)&$n?^iRO8`#$@ENfN?OpcnAZFY9>|wF# zezciG+EezPwzLtX4Iyp6wP|?>^Ow7)p}ozYN1uQNDsDf=ZE@o^+c`U8V|D?luzX4M z)O11bJz^{BIbdzJ_uCP?J#jzWJm}VNfNOZNt=5AamsN500sElX_CRkQDutSdZJc*7 zS7G@pc1CPp2-6dAVh7{N7NqXHui1yiu2Ug9IUBM&?9F2L1Kl3_d-_jk&1dWZxAfQS zXO_1DKc7I%Q)173HKv~Olgrta&^r;CK>CxI%V%t08AcP4gVwg@k=lT@-P&gLEbp*) zu4v0US3+X=zTP}y?Xu&`yH`Sh_p|o1sQ0Pz9`r3Dr|f4@)9^~jPS`QA7vnyPnK)M4 z*A^cZPv4I;k8-)a<)`hVb`-hyZ4QYMlsINTV;@1>G5e66vJWFIDsn?1d(iFKKesj_ zX8#bl$$NTDr+83=eRc}xpoqN*y>MI#abN}MLbErDgAcl!$6cI`$}Y^b4m`vYV0nmP zIRfY%2h2{q6RyqsIscGzMv(Ilk@GNPUe9t4d@il)llDo5Z&d!Gwcmcm+GbPQ5fK5! zp{Hm0X;>TuHk`B$aNIHO{c+WMH{S`d!F|HX%zZi~dRBDt9N=`yKDvC+K3P6gK1`4m z&x_L!)ciejFVuWaoUu>Zr^ML@Vex{EvmQOo>CNZu<6`u=kWJs`PKB%^{`mCTXMp{k zpqkIxouKcax+_}SO6Yf2$!TElX~y6laUMN6V;!~6V5Oh2Gc^0+0$>=E5|DY3VR+2N z)|i~M(}4U-7S5-@g(E!ji{fRBty{bTXq>J6N4Jjcs3R`-qK+{ix|hT_p!zIrD{S^8=WM@DX-uS%E`^THX{llH%o2zEHi|hD=$hozh zBCOz`nS$ zd%1QDO~@O685mlFRHu2$Is@E0W1qFpF{TzDMD62DpH5)br+L;-ET3Hge~u=aT)>xzNquYL46EVh*)l z#`iqFuYk%aDw~wI?aNGMFWXo68)?hu>??$O%~wJ7E@N$7MchXFD#d}*m}pMo{k%Q7 zd;#m|RSFj{(&jZ_^i})HJ-zvwebL%!zeXv`FXHpk3M4_m%8T~Ph`+Xc(S3i}eg*GW zmR|;i0iT8(uh^HCUr}{>B^iQah~@TzvF{{rT|i4A`=xst_@3)F>cjU9%u&g{evde= zj40bT=;?asNa)qj)zA&%%*O07^mmM(F5&4?fLmL@iWpnPtyK_r$&I^YkJ*>zz!w5O zucQ8|!VUZN@;K(P_GywGx||T>$qoDZEJ+yZ#aKrubfIKHst}YiWamn^jT#PsreS64 zv)THziQ_pRsg(eNIeqi74uv;xVyi9YYN9A*@veVndxPW$=DzHUbdtD9ja2G$2XO$E z#hN)=msLaHJT0rm3JxbCi<8Wffd;egpUOy*fU^IerAP)zEgJMNVrLqRi56^(A*E?gP zVgS?cTy3d34Y;C@>Im8f7FDt$Q3e8)VyjdIu&NQ~ct(L?4Qeaw& zZW%ty@~~$ho3iB649%sIeLyHTqp+CkcPnlk-BEE6n4AD?X|!XP8rLjls^H<^iEIT@ zFdsfRqgNq&!b+A#_Rt99&pu$9HQz!}uPCAFJY+F6>n`YBF~BOI1Cg^8a(*Y;kFhz^i6bEJ=a^FGxHn5U!S#prLgsK)i!mry+A}^$4^O zYXLb>j0Hek@uF!g2?%W=Kg!bVlZd)rQ)3C~8g+BvCjNAEibB{b<{F*@qL^sc~$)kSbH8Cr?BZL9*N;W9}Y@ z=H4oT#hNzIgt6|%uc}59#UuE#%T>*iICF^<9Yi++yQ@ot)fgdgomp6{ zqq{B!(>O}t7GpMee;4+@Jwnb!Fl0^&=h)qkB$`Om|^$VO1FNW3C1Ze~jtTl^|%LHV+ zU;edUrN6J9zVHRziLk&t4tac~2)S_>aC=urCP{jkMsBK&8=Rtr#j3csDO5a|&wVvh zji+fZtr)dAY>#i_dvy+_8UiC#61j$w{2DyIF!0lnppr0pfiMYpLz|rlEj`0(JJ7A*;3UdZ)UEseZsIfc?sJa zEhwxH(jWq1Ncksox!fcaW1UKX?#lmNRmC5lZfv{GI=5h_s2z{opmgH^?eB`91+x*z zNUPQwqdTfxV_4A{GmF^;6-8&L&Uy{J0gcT)xe&!^#4ccXBHPG-?3S=fvTo7ba8&N5 z`Fsk2(}9)i>l$Jzi;8r!4~OMN%Kb70q!4Hf`eSe*LCU@Cxrtn3BO@mbMS;jm7>>xj z)L7h{Uz(k*7wAZHLPia zVRe~9Nx6?|3Adi1u!3T&W8s;Qtuu~cm?18+FbZB!xtBBW@4lC!BPR^7ED6v;@&w*i@@@Yy)p%GUP#(K6q zaCffYSDjB7cNUdKEPmUN({uc(| zxh&2ei;6YOHI4=aRTs`Zk92D~FsK|Vv}l$ubH1E;IKq%2X7dlXdEdL>#Uf%Y+G%}pmls?2;C ztpfk5)T6}gQjJeqPRy-UpE2Z3PI4!`h`h8kdD+QvY9tIkv9Vh%or|NBOkmby3%`7r z|9<1Nlj8FtCc~JW`+=Vx{jtiI|L8QJ{VFxqtw!eKnbV1hZ%w5%DN~x*V^2TYc-mjO zY_9OgPDqQ&hpVOm7=Q|5X(pliR)@@CJ)dR+3aml0pOA}`Iqn`;oD_>x3PgpR?ph&u zih-@eJk_Oh&pH^!2b63c1$Fl{BgKrjrsPbEK>su z6ci~SqA(owRMxa>C_2J@&8LG1$x8;aj5$)c)aaOW8C=@Qdc7AMIB9c)$fD769$hE- z2=)V}V!6hq0|yGzB%9%Pw3oqpTAJqgIzr4BHiDO=7zhrPQ#iBN9~l=?DG zg;HcTQo!a9+Y1_2(Q;CkuS{ONbTNPa>Vy-8aSye{dzBNOc=4Q*@M*1|Aak^|H@ZE;R_Q3zO7fC3igV0p6$^Uh+e z2JB*@5ArrEH&W&dWsbs3qi(Vth}=LCI)9Siitt^1)C+OBOpcrx}eAc zuh`*yi)^miUQupX)nvCfusfO_6o&`J(8b5;B;|~7>o-a zCb6{h%O9YCHUfDILB3NFI--AA(ejfN7rr(zMqO{)shBrX#If52mbVI==#2uQS@bS_ z6as|JNl}C%Y)mkmn1XLU z#*u}53S>oPD9C&0xT6)EjzA6hgx8j%Rq{#F8!%JkgmM1K>sQ9ueoF=0seE#_2 zjEvqRVWh^mZuS0AB#v>!QWyu-p8hZvP3v8FBKS~EfRB3C^bcaO1jTBxZ^x4K9sG}J zLwKijK=n^zy?ua77qtyIrl>FYM;}5MEA@rH^>4&FDSwLoKZwO67&-d*Q6v__lY%sA z;nH0m6w-dH)SocTr>IMek}B$5I)XS7~@Kp)~%;2gmdknSTK z#Z1Xx$66Tf3y}Or5_%^c)JJ?5Wd97Bo1rbw``dztL401p$8->oHLRuEiF?p)H~ZS& z`mL}^xkom#zt9}8pl!DY2B80iHa5B(vy(!%;`YEjZ8;&rE84~o^r#!HWH|+k7_!qN zo4ABM0PBI+rqFVSh@*``HwIQAZpWgFbr$Y4@SM9)cy|X4qv7z?Nai?x*itd+^=ITG~``1dNJ%SZqlgl!B8a znf2RXw23ihWi8WskXG2CD!mQM*whXO@dT?S|MqEay@Tf}vFSt7+BU%yL(bjalooKc zXi+6To{I1SkS>rp*BXRb++ZxYm4vU6YW7%wfd<0m}lGJPWeMr`X5p7 zX#`Fbk{8H1K1&gQhl0OLv0)rAAqtpT!8#Xn(`+o4|8T)9+xwd|BlLh z=jZTWrk-S0&y~~Jioq;9+H79MK!(M4M*@<8U!#Wq6$Nx{CUER>&$=_W|0Q&7Ce2Og zW$@zQXw%w|-lg|~GiA=#YX##gk zz@M`1aSHDpBJ9Pq-6Tni7<0Z+-0^5DW%uD$iAwFax?B#m58Ng3r)UklX&k(45}axq zk1FEePuO{TpRb_}%lRf5rGaZ;69mgFx{_k#)h}6uYp$ zVCMbL?9lPInJr-^VKpa9N*86f7BB-LMvRWn(1R*F0@lhPssh=hZ;7Uun|2cwuFmCX z+6+@3ugZLa#F`{;RXoR8Yz-IW3kaG0Scmgi#dNf`@9%EAYV@$2nZQL19O;mlCt~L3 zJ#F6?phmCC~V>0D`Mu(xGUjQ4Tn3x}<5&SsCLdn2f%xAnY$@}!g zyvk3~6EQ6Erx3vWfK0?47~HTLYIyL^afc=bCZ8tYdxYz8coSErfMQ1>4K_*g9(oRf zJ&5)o@Y4o-fiMJVu4pa-(cB|hlvb+CtU!=QLQn`=xsGR_Sh?6@KT*B+JVs;mL9TkE*5nw zMo6R{B<+OD?qr%h=zWJ8wcX=F0c{dAY<1l=Bsy{NXb{&;dX_h>guD@=e?x9xy67U) z3SIeV#dRYt!*QG2dcn+?$UI2+eduj&5Iwl0(OdjY;2LkJe5fJo9l`@;Y{sfAHh_Rh zToL{U+WZT!O`19Y!N7E`RQvPgulD)3F+fIf9Q?Dpvf&{&xQj)>o&+wC6}Hc2c-KiT zxW?Q!*&0+?-%Pbguz`cliWvL3)IalUUgH>$IYNK|gP!q0IzuK)a zF|aZV9CY0yKUv-Wt&kC9@%P>3brYr0PB|D|mu2aaw#$303r7($rbojkqq9bI$q4cJ_ z@xOsBU=kVOC&GkXrDd`~l)KBYrDQ;<&!^><2*KZU-pgDd5euya=Cus-z1~)g$1c6s zb%`zl?3&F^Sm^oedwnhOffD{ukKXHP#k$vBbfc+&E8}9zKje?EaML)0QTRf)L*7L zlHLr$M$1xmtV$nlvf)rc%sTqQtdm-p^=<>n|4N1b3l&a~#Et;1JcR(bt7S2pna&lI z42^2=9M=?PZvQ-SKxDQR+Q$k>s0vdNQV~Mf63Kvv$>>8`ZzQc{f=Wz}+xbQ$rGGuu zO(p4nkd>08&V*w7{Yactl>S@tAr9)Bk)-~OR9dFNrZ_RxJy@5qG4CTm=X3-%1Icha zgAvTogm`~so#Qh?Kg*1e1^+sVwNW0#mUZ|5NLldtf&2ZDmCyj#8TzJMorq=63s#uf zrKqKW0nu#e30=mh134mKmcU-v36E{CJD4#_TVY~tz!=RVSCko!#Bx%V2ZNMiW~T#e z4%i+mn34vglURm^(Sp|x)ZWDmQk)s2Zkw2*W50ZT6dnceF|4w|mvM=(abI=L&>i zlf03lf1BLCfDwYUi(uA@1H)wZa@)97XJh8Wi8Gpd*Slc6IuB#2($bH1WrhPg`U4k z!5>k;T#Ni;JU$+$!Y50Jo<5?0H9)^jPju>E#X?Ap;#iGA`;`RY3`xk_ zmH0&Q4+D>}3Cb=i<1-QYl)Jw}ZYv{{6s6!PY63n+VJ1sKf`TLkJ_8})Z4{Qb6l|l+ z9I)x!2E{jkJN9^oncLgI1(}~jzR;dQ?CmfagDD&gnRM_B-~h;oin#`u7xjXdcm&tb z`TxZ=bjTj`csQoSCMjiq&5R)lPMmaLotjQ>V5NB0hi8y&26_$ukC=rxJ}fyQh>`n@ zg`_;?OHRmT7-HE~MW`9G!sQ4QC^2luh=WTX7#WG!eZan!K$$pipK)w|aPU`7kmDH` zEJa{41XCi~fSYkUUQXH}wSm7GZYG*Zup_B*hm{5kKn{KqP{rshP3eNOi*B$hU_1O) zvC-QJwAn@Knm{~BCpy-Mr;R%0#lu`mxy8~0C28AEB1Z(Szc3DVKDeRy3^r!h06lQ1 zf|!-)*u$4Fzw_M~cNSqiZ5lWzNczBrfo^^KRMC?QJi^VypQF%%EO1!wxr*SkG=Z<7 zx;9%dQ(aS{@OUzbmXT}wJ^LUgqX%@z!C=?*#Q~=ku4hFO8-W-Aw#E$HjPg+#C(3p~ zim$t;BZ%rKk#t-q^|X4f3ry(nams5i3@dVYWQ1hhb0N`Ifp;I3J!q~~Ja7^TUQK;? zvAVzqF~L0|wy1`QL|BxnjSVe?Gs(W5lr;F1i$PHlLLGg_) zS$qpOBv6Zb?++HHdl4Ls>xR@}3GdUP;fy-6Z$gAKr0yNSs@*@07jV92K{s!fq&W!e zlY{j787dif8*;P@xQJ4pwP3>}f0~kimVyZiei6ZNT5XxVXj&b)H_%fb1$4%iv^zW6 z75Q)JJIY;#a&VzkJ zUEA8k2)6&7)MyJDaONtkH8OI~=<9CZ~7 zl-vj7%5yYFoo?)0s^NA;eXhn%lo&!J?jtE7k#7Y@7#@+R%1N+*%D7{x<;Sb*XQ+i= zqSQEDrJ}oBx;aC)3USejn&cZ?P7k?a=9UXZ+)*Vhp2|YKknf;8ZdVef(&Z2k1PR|@ z4>65NYJpew@6aO2B73O27i=L}-1mYx?8SGN3US*Y150(e5B1DYM(>Z-{}4X30L-x% zTIqr!-$tMvL z5?Qg$@4_CelIJfl5?Xsd`H(Xb?a_!-m;Xuq@D<+;6`vNf|$O9MWftUk`feZ6M z%wcj`E}r#abkvGtB*y|dj&l!B1Y%C2rBmX$K+N-qIW5ivV$Pz}3t}`7a}IgViwl96 z7tvNqycCGJ$T2SmVqQV1kQfWZT;l$W2VyRB%#}dQtBAR}3WgJ?X;NGZqSj5wkwVEKn3;vko5`GwYQqU7 zT;AgL^mpmmPL;O;B4PRO=?yL<7vMrN4Y$S+i~gWSj((R1xDGh8B_SwWCx##t4VRzb zS|ajC8Fp}j41s7;RO~JfX2|oTo99zVsp38RGojBb-S#$4t=-dqa-i95Yvt|G3v1;a zq7LY00*?BUVv%MM0@Ix3L`8$zYpRkf70mV5sdbvacC&F;~5I%?Tc?O>& z_&kfxW_*s~a}1y3_?*D!BtEC`c@CfFG1_qXG(KnW$>PFHC)^mneSZ*er}n1U(dkQg zJ_|S`*y(Iq*vu$|%P(+TOzK!sO?HK?Ao_v_QO3KC4Amq|6=Mw5q&14(qegs2U^XQw@P8Sl5722OC?;0m-rQ#P@`*PMBO1(d{EEX@Ueo05Or7EyyS?TqF3aZ(CHTcTwAG_r zqkm+Cry0j`cEs-%zv>nD64d}&m95`v4=mhV>wK1LYzQ>vDbX9B8=ZI!F5Hy0QJZKw zm%XY0Y_2-)udifS2P=JFe|J~)iI;)8@fKKq>KxhV92p6S0Oaoh=8efU?EsXwLtp)` zL;~vrXth-U-1ztt>crh8tY#K{2*_PzCn1`6WFLY;{=ky;1^yq?El;rDNR5z}MwT}< z`WYFB$9D}Jn641U6F!)9godqS<82RTittx-2Yx;23XTm&}8%21* zA)Q657Bax_P%?n8Q{JMWA1G0vgMEWqkFNIvw%BRB833MNkB5F!%Ie9bLXg|I-f}Po zW$?tk=PM>&*`P1PoHvoQDAn?$?ljUPBP~M~`6a@mjml%vL;f}i?4J2bW9uXD6Dop( zOXcKb1@j^$RXpr^oy)qh0YVQ4COj2t`J1Y#`4V`(+Pj~mvU^(Xsc7VTqNC+K4z)h<(!<8wBw>$V%P0<9k1ow^CJv0XG z;%SEY87I3g#mse(Hp|$-P{LuGwk7YUm0VA(IhwMdE%w}>%zyDSzx2&7J>S@+qTQ>X z_-qv$g@F{+_p^RsTFEw%$0l#KRX4o0onmibMW5-`n~FxY#Kl3;g_GX z7IUy}^J_B&>t=70po0L@T7Y2Q`%x?6b-Z7-YV8pOTfcWqX3fSeuiGsiz{fg2FOOus zzMw1Qe(zy{KCY7+_<<|@suceMk7A3aRzXkE;>N39H;P%}96*|BJmg^8CDyJ$-d8A) z1S`vKDv6{_`CSDsM?Wm&HPe1&RqiB$_}t@MR3*nuP)*i&le#z>03voE<>;i%1=kMR z1=sLe+g215spY9wdEGnpF|MjwOSd@9eEsaDu{9W80<2Or z_>(*_F-D9Uep)G)ALLcysbd3tMt-0z#XkY$``h9@MxW)*jUh-7-CsQO{BpTR7Id7X zv>*jEjqn}0OE9CRtYh(_cneG{0b!F8l^kQQV- zyhOrKk2Szguhp_CNIuaeRl?8Z6FwZ^pl6`_9y8zq{6E}7WMTRSe4At)HU~EkH@lkJ zXF|)+(XMDerQeE7D0wCggMfPn8=#&$;AnTgpyk7JwMsxco;pRUIKsl`zXc(^32A32 z2C;r)kbeX06nR#xn9t{0*&0u^p#4a8sId#9qO%rU>-*!w!2LDk&dLFTG3oUt$eY?k z>t^$qw@0;nTroO18vXQLjHgC6Ita5AvYlu|lgSgwWaIb*Gk(fXs7FCv5i?=Bns+QE zb;HId+PR#ow^AA){B|b^i+2Mi_<=FjpmIqk?K)}VH~2wfvvxDT_2I*A0S>NQW-6a? zlUr!LsZ~JTpeXPjyHqvvy{#nA&~dn5Ii5O({%kpWD;_u&%< zNniLz06Qeh^AFT1n6OJSCsKzeG|8HgQ|KVpJG`X97pA|))b^`1Bb%p}N^r&RapH=+ zxK;|vkr`V6k$JLG<*{`#S)#CsazL|5wyu&)P4ZnXN9wX+Ewq%UDNk!g8U3zPU4(u- z!E%(c9HQ4}C^$j^p+ufglHGI%4rxlsI4I9iq4NmX$0i{`gyuG%7`=Sy65I|?$fqeW z6_})pRA*wrFcwKg+_Fl_rvpcHQ~nETpHcfTJ^eQdjv{ch1xLFrFH^&W^!WtNm2m1O z@w~H#Pv93X8WTE5z-{l)_+LE8#<%}-x-gHu{Owo@Prf?UH!&tZ zQc5Aq9;*V+!T8oQdDcq!xxBYW*UR3xP$rlw`0Of#$@`Xjbqv0$*2oVv@TweM$yWa! zF6re7*8Vv8gLzsb4#LYD*XkDFfoh$8#^8ZUOS}HP;7JYM#Jb>LjC6h(7?8sZjO$9n zvlZ%q-!<|-HK_H5BN#b6eTB)-8u|80>F^3bzQW)sfJ$}2jEoG*;CW3aC`I&vFnd~~ z`dacS6UE~=#3Mv!{I31h#<|Z(gDM+fqpPYK1F?j z%6ATT#^%^+!bzwfbgk9oKSp+t3;umqI2Q7J>yn6|^(;426&aPQ^((OKt<%|PLWzyT za87Mx{ykfDQz8a7kebE%g&7L<6xUVIsjc*wkWPXaPDlV2*-I#U z1Z8^3PBEQ0lQ-C7c2TX}6l5sqp`e!nV(-{1r*fr9j2^ifB_|}x2`7I3JS5q`q??|| zJ_WXu{!aiqx;3l3qjnSu>E|-*iTa%x;0InD65!? ze0Qrl_%U6YcJxv`ADtt)oRfr-eN1^i1(uM=6SaSiI{kB0P2#NoL&A8fI#CL*%YQ%_ zmMPdlx#GYF*3XAwEr+`-zR8^YQOZMmIB)4r>=OTijeL=&F+$!{)lJ=RQ4Y32{USZF z)hcgIWL3%g1@GyM%>jFuZIqd5bK(>x9satXNB)pW_bb?IqQ6#{B0$SetA|!QRuI-b z%wS+9n!#$M_%uA)(aNOs6!|;UGvO5cD)RpaaJ9I@rJHIkt`2{X{P{&tk583&EvhoX zIB$h=ik|vX`ujubzxh!8|6A4Bif`4^dP1t+^_BG7tz2zSf7q&{txQ`#bv^af^eyNW z)&DnJW!$=S{p;wb?&r`#-*_kw`l0`6`VR8EPia&VG)~vQmA-`c57T=RYMh|O{|`>2 B#R~uc diff --git a/scnym/__pycache__/model.cpython-38.pyc b/scnym/__pycache__/model.cpython-38.pyc deleted file mode 100644 index 1d5407c75ad826037c78f84716cbdb1b2b7bf0e4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14339 zcmd5@TZ|jmd7c}G!^=|CYAwrcV-FO^F;i(J*>#)>ZX#K>e5t%~B{@-PsN7l3aLE-p zL!TK+y9;hpR00y9b&y__+w)S^#Dat?5L-8{3a0cJ-=SW<|RfkGj{;DIb zt*Lac4fT=HHb$j(X=Jv|QMp~_e06BGEzjDmc-1>aKB1aScz~sNdAv3&<(C zCUVRqr-7WZYaz$-l#K%$P1HJwyoy^zUNxz;h@6^RM@~J-IpnIBl-9ys(5S4mjMzHo z42RCz(2K2iB2VyL=>vZd_;Km-WIXh`RI_-|IOz(%i^Nmfs;jg$S8eN_;Tf*x>bLbD zD?QaU+|sRjyX2W0W%|12ZH*~5+SZR1Lupq)Mv_WSyI#nT$Fqwc-|)OKswC|$c#iM~{<_@@g#FIQS@%%Z4c7fZgz<2L-*pBf zs`0jW%)T;k{PnGg-R|S}n0=}51Rqh^MbAHGzl%cq(nPj$#FfGI)2CiJ_0sammtX$Q zE8NveOOGwTGw{cgD5Sgoz;!)8HiZ{Wgx}S9mSjTrG;j19{vnb`iPWibQ`^<k+hDA@V`Z3k3;JHfk%ultXzLC|h@2S5qQ@`OFnD>(N zZBk9|w*u`KdBOG(p1pP@@V%UfmKxVVY^UcrSZ6Q1{HYq3LG#e*dMm91 zaVeaPJt0i2a%^?LAA_jV5oJ6Ii}ExdJaS>+gIQy1$iF8~#3NtI5gJ9NUZ)A>~S{LG1dF!D}pO&q^} zVl0AnELM0T3?`!MofvOL{lM>xo$fVk8Dw|;tx=MW0@oWZkGDhvq#wdJTtcE8YG|fv z>WgYqZOByJK8X31Gn)?jQ!Q3EMFsV{8nsF;+6X0l&*2-MK{8c$!05XQzUoxHgU=4w ze^;C8$sIiz^u*8DD6cA9qeOXNXLb8uv&EIX3<+?<5w1PP3=~%S@wjvo{1pYF+h0CE z44mjP_%IL|HxMI!$g6eCrdeN3nfj`fSXBNfjtZXi4U(y^W<<11L8~DSQt}ifPg8Q3 zk|Ri3hIoeVpF{B)wsu{T@=h*#IJOPKR{AMDfa;3RPq5j z@yGB?*K#Yj>x^7gw{~lxU3QzEwNXJiQn!9vcNg5otx~(n=v4#T9gJ(|K=kFUv3Ksm zdFLM?Qt8=_kYV%Y8}tT{X+05)Y;SW6VL<}TcAdycnDT5CiNV?=nh$S&XWjF?aF4j# zD=B52M99ev%Y5Q@cuI+y2X_%nB9gXDEvn7yB@w8zB0DD{DV{*i&2KNU;(>!m+9$R4 z6Tkx06~P#!qLUtN$!s5_`H*SjKoHE^V=(~yM*++H-av$rL@O!5u?AAfIBl;5!7%G+ z5_)!4mcU;QK6e4q%3Ts-|4d9p*ZIo=dC;NiaXDEh;6h=_rLRzLm{WItH_m_pWeN>iv49dlYG^YE7Y(bSl#_(&`lINT)WfAgxbp zNEf2U#sMfGl}trx>^AX?mK&uz+Pb<^$6ZbQbXrHvgGou<)jmcUWO<$PcMsfAP`XH} z@5kyo$@qG7DDCAY^|HXd{KJ&qx4)MK%AeBO0Wl+=Sh>r_Qrs_l1jK9~B*|E=hOFF^IwvJpp{u!aYPOq#tlHO*H>iC&%YR_Y zoCMm!18B#vPXd=r2j1Jcs+ex}hYoE<{^f0p;$ffIBif!=^iiW!%%H~^@keD37lBkG z7flappR<)S9Bex*kRj3^FvdCJZC=M*%}%7CO;T_WMuRS+P5#2nCLv1c+AAl~^@EL$ zS@=l=Y`z+^5_TSuK%nc;vuwy=TgqYEAqs)6g70l?|$HGnJ9X9d*Qm{58!~1P}Xjw{Fyx0 zCF97ac%k?dwyRjB#OQ8UdIR4X!s6)(RvR+~muIGrSA{=8qMTsEMTPM)`+>UK_|ni# z=#|hZRpY+#rDw8PeNr{PGG(&6XhT2Hm8E7;6^dH&*?cc4%28<2LdJsB1(-O@Ixzmd@VX~pl;T!HE0e&bOs;gcz#1-6Y;gv{- zy+qa$90stf_%70c$0|Dxv|SVK84Z}D!}Xy5y0T+T;jRH1me3N2Pc8jt3;kZFeg)FB zH=nFEi+ctR?pCHI+LWi7_;ct8W>4F(fVJfvYia?(Oy_UFvhRBWRs`^dM0jSMK}{3E zWWCSo8f&k)8TBhN*BFp;TdFwQ^LDl@XZmc8U3_6@NcfZ87j%VeMZY`f}Ri&KyZ9Gfq@*?gaG&aYhQ?VXSMsXQ?yzh*?xHJsb zPmRUzP*w6Jwko;JD$YEpT>k)H_Ort1Jq#OU z_VbxWl+XM8XY)sjrQFjr5;(>ej1gV*HFN2BI)iUW6ctTm>BhaK+*IMdHQ13$_omcF zio&A#0Rj*BQc~gr~_J06Ic@&BKIxtX`2n z{i_g$(#hniOA17j=6r%mnbMcbC_gwJjQ2_^3^K%a#kd^%hBsL6N34zx9XLL-KGGrE zGzCQK9+C^{!2h;@JD>O@bJ>tkxz}zcF;EylN@+@ZxR>UjIL*|UE}jZV3+Y}x3adHt zHlw3kl*Q8-*8IBg zGWP?Ql&f&;b;-lxX5A#D_}+~$+8TN`T$|*_vza% zJoxevO$i;bWryS>0Z*^KmG9`)d@{l1-3U%SKm#~=SeWP4*=bm{;l;wHz&<9x89{1i z8(blo4ks>Z^6m=ZKn=i#cHg<~QDgMdy=D*K=4oY35{JvbeXpg(rexW;oQw^Q zZyn6WT9bGmTf87hGx`5KRF9DKCFN@-Bz$40%gLn9c&9Z~q%VsN(u1KhK~#_~w1fa& zPlM3V9UGlWc+VwV&I6AEehA7Av*V+{Kar?z{t^}{V2iaadsWWHYRV~KiPdySFShb^ zB%1=-0sUmNN(|-Y75L$Rq zZWyef;8}VRSy#ZeV>KT~B)kVOf-tTiIyC^^BnC0=TOg8*iLlS=oN^^~r!lgY5<* z2ea(B&NzX?*{Gagt>pWlj1{w$Rw*vACLLE`19Rh0ZV$G|I3QCh439!p^wseVdq^JQ z9#qMx*C|CN%mg^hI5LS8ZZLDVaz|O$rUsi4=0!=Q0*_@-V z2Rj^+bs@3G3!Cqo$Aj$sq9O0$$^A|#h$nI00+KvRmd$!B;0~8wi64TEq`rHF?tY&V z*7V8SAe{^r-LaG5HQdEjIU{7lBf7=WtGp=-cjn&jQL8yN{uSEFw7ZpO*v8i^1SZY< zny%e5pxSF{lkDyV+F7~5t;ub{JA&m6wZ}CB0wf>Sf)$_4XO{5I z6Is*Q9=&SA3Pt29-#m#;InSP+5eNIM=&ZC=lB`Zh5|^Cuc(^6`tc?s5G7nH>R=k6x zUD~8|Z`TUrC;(Qb$NUK~Ljw(zLsf(Ay8A|{s&3brEs|A&3uzV%T8()?DV>I7po$BW zP`BbkN^98 zrhQ#xv0TrA@l@f=kD);xJFvaGV`9V$+)`B!^y3*(yQOsy*@He+cbW+1IQCKXGW0-R?)7;b4sk z-V+lc!_z@jf$C)TEDe|KUK_0h8&MeRGP}&qJHyahJ(djv9Y~%D)eM#LxbrJK(-u=V3m&c^U8TZBBLQ=AR{M1YTTF|f^-&>E|*?Zm6TW3?F{VI zxw@SW=dmT8XN}JAaqtE@p22}%go0645wA^=@6vfR3VZ4s6bLRyII$?(AT^&RBMDj_ za;u~X?wB*>g_0>wuW|4dJ#5qvT26bSQ*8Q1oi#ch)kQqqFTzoc#YtyG%k= zvwN7gcXouXxEV8lTBh4kUAj{L??d&}q^rjf3DoERL@Y>JQib?oF zV5*W+Ar}C%djg}I8|wT@TboG!D%(4g5s#-{XVo9(LwnDG@03Jg@vPBx9ODcG&l3)` z<}97Fd5FVYS^p8Dtk=l5R)o44Gsk`h!GXClH5YsW=TrO;eRY8ksmyb|G0vCf?@a`2 zTFer-=qrLS4eTKu=_PL~)Lz<#PXq*X_%|aX6|D&cu&U)D8fh$IyzqmK214*;_465F zpCs@;nS^`>d8%@zTgf9MEtT!?)>G0Hx(e}IFojrnlN({KQIy$39LaR(^qhPV}0@+oE z5feoi#lY`2<>><4 zU-M$bkgtI4z8hg`To`c3L+thwrTOS~v&ovz!uQz3L7eL%TJm&W!3C3mc6o z`M;hg5dNOq$;57| z@E6YiW~ft~gxn9XUoEyjJbBDMbv2(9-o9VyB^iCPEAp!+;)C(3Fh{wNowo$DV@U_` zq^8S5m}m9{uhSvXW0?k*$NN?U36cS#zsmN>vv1m~NtdhnA0q4lK-m%N(zu0Eu!%St z{_AntX4qmJ+kX4K2@cL;cKA@!e!Ytr#hGChsT1Jnd)FI~ts~8_KC&DPWGK#47!{K7dne_d>#P`r%<&=#zN^w>by%V!9U zD|C?NO@i*YmL-skYQ|odV^dh{0;ZL}fRRY3~#<7lm#ha92P;?0b z9|yMA@H}%Yf#K^K)zU}ddb_;&5|B3BmOh+r8-LMQ>yRcx5?^xE9?)Qq$`OK$W%Qyv zcNmyj)#>yQ&L5Q)@oV8=!eM-uG{dNBP0iG*YEwI+Hni<3z$JC`80_d-fn}(z;BO~$ zy*%X0FSvT%qepi25FE-g2t2Wm;p!nJ<}XMUAThxwi!SiW z%aM+$)O?Tse!z#+c$~47MrLCx4b5^uhh0xhL}5L$SWs-e2JXcl8NQ1V%EJP5K;(#O TX;ppk!s3sVcb8sW{K0<%x~rBg diff --git a/scnym/__pycache__/predict.cpython-38.pyc b/scnym/__pycache__/predict.cpython-38.pyc deleted file mode 100644 index 27176763a4e476d7e3259131952431bbaeb830a2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4950 zcmbtY%X8#L8P{tx8qN6q*mc4~Nr24+uNMwHDnKe>!-gt?S=dc#YhX*3r+da4Yoyt3 z&1UVAFLAD+a^pa#vN=H&e*$L?IdSgd1gE`oAwa_KYsvC>y`iW=Q);QZzgK_V-}n18 zZ_Um52Ci@XV zv<%Z2d%c#XuoHjW?zQpmutw4u&-Lc?yPM397kUeol*QiS8wP7K?>&QgeCgQkEi?0u z(QQ43ZezP^DpyV-AvxX}Z>4dTJ~lCdUi&6blDh{Je)G*+=yvYJ=`i6m`1+kbG0$F2 zbY8_RchGRXoSwzZp3Qvj@CJ8zb6{^6+{4dh7PCKWklF7wdM(xlg+FTZ&S-8l&lg6E znz8{(8g&O2Ykuf3kF`L(#4H%!QLWp8v$&sg5&ahSR=uDP>w+9M%U~d~alju<1eX%4 z3Rsj!!Bk=q!OqOecD8&!!1Y?5i+FdMbE&)6>q~dX8RJR#4v&X>xeQ(ol33Znv)4b&8vSds{T3%Q# z@8uv)&rL|D<6RD}!DlcIc?y?&7Ehd#E=^Nv(hIp{;!c`w{WxROgzp4oBaixff#f-< zJzI2~20L39yafl7leW7JHFse`HPcY*RGrzCDYtZkas|()BJDSDqRuXgD>`aF!Y#jv zjn9q3JT`N4-xBMEIW!9+v{JXw&%!~^E^KD*dt#6~%sMtrBX5k{!WiKQZs^E)-#T*h zX5pTAr^d0#+zsQXnYRkJXl|en2|nc6^dzp=jr;3IUfv#cPUZ^l)RZEhKQ`VqABY0S z9eO^3o@TAbq(lCM2IxJ4K9WCLVBUrS*^AI&(Q`<>rApIXXj&d%WVDFeXRQydqVX&1 zm*!EcXq~L+m3XCw({g32Cu^t12c`%xzj|tb&KND7tfP13)F7G%f6>wJM|Uj4s88@y z!l#TBL{BcFIOVL~VcPkp1rYW)xE+aT%n_7j5YbQ_N7rj@7ft~VC64~4j;PHbpH329 z6B2(NTrlPOnJrqvnMGScJV59P9^`vb8f58Z0Ti6|36;S_WDjG;*(Tm}7LiqA06R}5 zAMZY41P-`XSp_25Rh``@VoJ}{u~OFYrrTupzF@)2Q$#4w7u%mskb@Ln^ zc53qN!#l4@IUVaXI~&$JL5v&;KS($rh$Q@>yJXM>q&qJd8AsS<&WBQhBx|U~IruC`gteT7FqUGAY z*)hF(bRMJ2mg~5tZ#wk*#Idb^yNy+gsFtn6<=bb4z}A_P#|HMV>nQV{a?2=D{%(}_ z_d*#Ta?yeIKJDY7qEgk4OxrDVljVg9xW9MC-cRUhJrtXXQ5a?aX1G(3+OG@y``c}bSF!v#>q17 zl@dO3_hhwLV&a}&RQ}sGn#Z~enfbAo!YAxT$pDFxf06*6D%+Y$WMihl#tZx__#?bs0Xg)9>{6X)< zTH&&VeGj~U1;tv?8Q6D>a$i26qQPQ|`xf4e3$sgl>4qc@OB4PPS4O2@xG^24$hX)t7?qu#^nYr0lbG z3ppac3;4=JosCJ=&nZvK&EO3x0q@sSRe`C|w;m+=h_RT8)hIcuC6V&uw4Y3w&f5gB zlZqhUiv=KoK>HksNXU!VkSTVrZ3fpQ@P=ROo==}g;#06wPX-jG1T<60b)i{h5*P&l zu8sYfE)OqN8uk1P*9AY;1Y~ICo%>WxODbe5E+Cz*-D~0$uyv^-z94}r_{#zU%~*>e zw?w;f0vBs>=i6J8%m@wW^Gn{Rf@Oy5MVSrqarCI#9|`By%m9??3x>q#;;WvE`h7mh z2`hpM)wOpMK8*SYK|dP0XS>6vJ4N&2KaF|%aVGEbd=Lim1Z83PlZLAxecn%F# zmjhIlGv-mMT*lGc6LWe7AA7)58z(@Sw^Xw_4XWMGQWQlz#a)#rB1(r`5WaPe*H-ki7BLA!3vn^$q;hZ@0woM$Yj;e+bz*Rc+lZr4>qrMqiy zZ{uuji?70n?lP4uWNsKLKMZjW@zsm>P8dFzMoBf(3`3Un!%zSU46#Jb8XDAtlo6GU zoFZPL(Qngev&!QF6$TZvQYimCjh6?dJ(I=7+r&yW*i1wI67&U3RgC-!8qaiA@VAg@ z{L{jE;UUwqtbcTVijjX=)~fD%A8VfOig=CqwK)i%cygeP5m#wcpVXBSOz@>Yn5O+Q z&Eq(y%0KUmn;=&%3Sw0G<=Sb}%Tm>&GY+3lbR>xHY0_+W#QV9nVP-9QY5D+o-z`1- U3f1{?84c~Ihu<=u^z-ch09n{fWB>pF diff --git a/scnym/__pycache__/trainer.cpython-38.pyc b/scnym/__pycache__/trainer.cpython-38.pyc deleted file mode 100644 index 2ef74908e7ba05a640745a53390d3ea34a47551e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 27471 zcmcJ2dyre#dEdPkZ!8x3elJPzB@)zfNxfxSmPGMk%b_SmqHGtojK%J~y9*EtfERZu z61k{TQ+3iba+J1l9=21g)5+9n)9Li#$#mK@P13YY$7!a2blf}bG<7=8Xr@g&)3iy~ zmek+xJNI4yi={|g6YSu_dB4y3zVkiKzB)Xd*6?}k#Xnj7^zUfeAJWtNiNfgI1x5f%%TjPasq>Gr*>crM$VN%?&>eSYBVOrer>de+`VRmb-Fz1&yUzkTZ ziR!}EVqx)p^Cr7_w+<8zNZM5O;5E$}?i{jmJ9^>pM>R8T4!xn7L)MWyk-|~e-X{)(0#f16`?H6Qho7fRJ?X|-y3$>$x*7FX=m zTDe~HqF2gI=WPtDme;+ZS4vxrs`Wzo)~gM8=ho_5jcvy&S~bV6H*Oc}>&30|t#WN6 zZ+Jr&tZMbz?S^&X%4IKl@%+`R$T9rN_SRO(zWqtNjC%4BJB5O6POfL1zk~$(sWo`| zJ`?agi*NG_0G3QFCRHy)%9s{Tff+NSR|wNej`>GGl$H~8(Lx9igqSY-msZ{L%X9FCe0CZ6yZ~5 z6hk-WC9c_}at&kkvsirT96l1z%$0I4-M&`0*EVxobA$hEeU0zD(3aFvzfE48{1{uGIJ}->vj9}l5L(`US7_xhWK2r>GX7R+rjG2Nel~IwVF;ashpcy(Q4G!Hrcast=qBM_SULp=j!V@ zKf1RuYFM_)cgc&0^49iNZlH`+3#C}OX6Yt+lzZJOZ)^swNBhcFZOz)#jIGkG{folj zyy}mXchTCWwRXKx$D(R3 z=Pp|7rR}Pd`$+Dgvwo?F=OMvuTE+Bnk>}86ERP^YC4Bo~oR~Jx?yH`5`E4GvIJshS zrg*7v(7aT*oxP!+9qT27jp2<;FN@fywI){07WT833?|)6ch|F*4AzL3v~D$uH5>=t zaNh)a34a#6gg>}mDjX;{gFbjEXSFEfYbs|VZb3b3z#uk zCh!Dc)*S&Hafbj$-BBs?OU@Wvu{T-&xHD0igb#S=O+(jeaWe>=>P)*@CAFhBzu`=D zW|UjkkbbH&OP`sv6YQ6 z%xy0f>5Xr;>`tT$gx#nNX z!kx`T-Mx4ZVuy+ELh$%ecO!ZjeHX%qQTIpuFlBf)?la;aj0lp_15yE{_Z}!!yBJat zeM7fDwbXOIzAR*hx}&iM|8NZpAwk?-qgREt`lEm#*?=Qg>=@7G^+KXr-zb`8`x@@F zAcSy-F4nJmda=cb<+bKbPk+5Ndx`glrI)vBB;krzKtnFQfJ{qiW}e z?P(NlFA&TEcu7v|+O}CLj7rj?It`kjdkb0Q>h(|brH~uET^Q?0?WsEkvhsRq!? z$o03lVb`}CLLaNK@)EV843n3s6*)pOyiG6Ny)EY_ylB&^uB%az0g<7Rj!U1V_ni9f zAgSdbee53*)p_Z%zV5|0EndLCuGyrNcb_}6S>LkG+}{>!l3~!E3eKTRxOkWZBD&+g`F*pKLwyY7uSQo@z527sNbZX z(MOHAk<~MiMLnxW^~s2#^DnBW@i&gY48kS#|4xoEMQik$;9{ z`@zRSO~>!(@W;*goYuz8z)av5cZf>Nw%r7|ObAWNZ?dK#B(Ve5p_z1(xIuu9@1*H> zwGQHT^mfV_>SS>HNbMM(G1}?&5d6b#lKyrE?yNi18QIa?4B>Uq{%z3L72GG#nmv^%(eIJ|X?RAxG} zJ6dOM2jw8mvNLZE-O)dN>gr}@8Y6z)c+0RSr7ieHTZUa-+A(2faVOKw)Qq;ES~g*h;I0-8eWUcP8`I#n zq1}g2M$R0=&F5Sj_pHughxd=8cXV@nMuQtSl^M-vL9Ovx46|pgAmE)ZU%qe$_t>1}oT}a5ZJ065MC*aaL zCoyCCMeVf*uuAmygrv)xGZ?ekFGs&>xD%CAL5{bK8^0;>514cKO?4gwr*a-QHs0D6 z}lfya5M^}69Iq3#^IU+ z7R;@0gZV)&uHvRSn>h70HbYFw3l7QhF1EZT*wQ|V?P{L@xa!4({Pf}=gh7U0wsA?+ zS{X%|@(@3l+s$1qT~(J0u(;~mHPojrg>o7JYrbtF({6%z22{ZAi0va&xsdb*06*lI zVpdp+YYs|3zMgYIIDkb-x2KnO^`+g!k`$fZ9aav-%@7tytAQQ?>06vzK``ZS+UlIc7!OKXPvQ(e?K32trUmH{jT{m(M?cHFxQy z7cYEFaXj~FUjDQ~4jekcgo*gBZQj{O+B^eDmO1W!su z!aVgNRja0uce-SnAnQuilI^7|)WMZ!*~CEJFyl5sWC=F&qU^etZaPQ+B5cj^;#|fh zCqHRF%k+BL%k(Z3Z#3`)iv#24FC`2Qv1Nda5%7qz*rjdI|_1NM4XSV2ISSX7YN}{xfI9Ou0i4Tc!W9VGgb1&=wRq zVrJaTKB2?zFyqm0Aqs%NfeP=NEFeN3M@tf&i9Uor351StLEl8^lR)T_B1Gs(AoR2# zbTCIN(sm&CI9i|ek@^TwIFWBMMCuTd^&s^TQxI&3)TdC+v^gFk^%;kZ&p9A!x87lb;8D@Qs<;X7td2P4oqZqDqmr?aTp*dggQVq#?QyMNC4 zFx7&jdcf7p#XAPt@LA+Mz%bNs!Y4!+cCe?k6P1%}Ur$R8`Q`oKW0o5nvr1m-kJ_I? zTk@S#=3$d$+}xKn^Qb%aW$mjZ%IAEtC{Hp5Ec2TT!*|IiD5LhtyNtp6j9%@mU&?7S zIg1_m7TR!^QjV#fBYhUB8V*V61+?(^SA|%DFX+Xu(A`-!ACUR)wtLw;A>>>)M0(6U29oE@gJPhcY{f8x|p^p zeOca(UpfEWr7P*()P)yce)-acYv*$I$C>hn2(A#kKya1dMSxap39U`<4)MpQro60T z82CK#;@h~oX}i(o#`S7z5lnY-=!HEl$wC?4^78K7*|SBt<@Xi9jJp%ahz?4=KDe_VsS_*_oF))PkONmC+dIAvD5fSR*(wa{|CP^eZ)-8u*=XP^bm`955)L!t$eMvbGQy|o0 zX(J8tE(78!jW3{)QRJLY!p+}53HN?P-cVMbM~xYLNeB{7ziYf3O(eyieK#6SqGqym z7?PG022&*8pD-jLI7&h=mH`>rTXsCD{8BbZP(?(hL8dB#G5ua^EFc!cQR^cWc~&Ly zNwmD#+Z>=jYtR_4v~9;BM@;SPP7A!A~!M#J_0yF zQP%)b208Jh2J{T=A~!m#i90r3dd0xdG%~nqjdO3{8Ws(92rfO*R0g{^ZM24i^(>d4{4^P>)R-t1y>yY- z09qb!XNtue+oh@>lPDI=`dYE54q_h8UZ&hEmTs2HN{7#=)x7u%lG!#G9TUdBmwGYK zf zF?SWjWm5h9Ps519#dBc4KMDWi%Q2bjPR|>zW(?B58N{p}K-&3aTqqKZ%K=3U6XiDZ$b4l#?(OeM+SjbPU~ylukx9 zP1>H@qlXAdpb%AcsJWGL=#bx0%6>zpopd9JAZsEAD#^5(==&-tD@tmgCkw7ouL{>M z%ysNTCs(4F_uA?WKNR~EWSsSRfe0(?&0K&5-@Qp@jd!=^@|RHH!NKc$04 zOUIRl6^UZMoVB{OzFuDA^`WnIyh(r!r&RXJTaCKyl&HZ+jTH>eha>4vUA(2fCCwS@j$AhZZb;N9 zlS_b_7k{qR`T~Km-6QGkZss4f*}`iqr(0T98wA+{0q8%li;@qNM$P|{@ZZn6AG{Hmx%FRoJ`=2=GS#ERV=RI zI@1IqyTCF8g%IbwOTZnWVV1Oi z5ej_}BX&|d8eLBjLY(OP6Yvj(@o+r_+Xa=Qn1(=6(!!;yR8UIPDi9|6gd4?#C=YR| zHzS{kv@`ADcGk&uM%=8K7QW<2dlddrRSv`2WAH;+Q8ymRPl3Nk!R)*{-+9nk;>>}Edk{R_C7*|@ z+~nb^a>>~|G{7H3Ec&b8_&5a2r%_6r0(lY4E`TH5AF}Ai_JkaOIJZB|LDlXyg~!XX zV&nF&(G6}}&ZQdcw9ji~$YGc1dP9!53wuJ2x@uiU2tu7F#s8to zxz30C1k&VJFLxd{mocYjL?C^%@>G!Pt=J9nTUj%ICo3NbV*F6mGlou9o?ei?g){LC z_mkACRi5pf_iMc-V(??`F$%TY$MKuS?*mBpK-jV+q&d+JTK2Ge92fgX+!NBisN3JZ z-$fgL*?A5v7&adTpByU#XVV4qG4t^^*dCl@7qRxRn@>vk69eISCtJB_KG`1*6QmEl zX|zxI{X8e<>&YEme)CkIKuqs=e;6^3xu*tZ<|+52o8OcAaq}ZCOP2gjcjx{3J$D{> z{n&9&s~vZbT?+XD(mr!vX%{8!v-g#@dm^*^^8>q|Cs@)xg%j+#&Lyx_7JOxdgXRVC zFNXe$;t$JH;TRXS>k{@Z%D5zH4up9wclR4vCnEbeXns^u9Sl=_O#G02`0>y8o^_%I z@$u3VAV@;OG~mh0ja|s{yWgvzk-k+v-E-Fx`c)Z(k?Olz5A4C{6J5iwo|6ydp?yw< zL`dCQx5VFEf{uXL-3h%7CP>YMV7>QFr|My${G5C^EUHTh4;J=5g!{=y^0}bnfgoSW zB&a#O-N1!~#zFnIU#xmS=c6lGxD4w*;pJp~>bf(3~jcR4LC4i&U>sRHS zLp;dgzVyl;#%Dw)KrRXpevsQ?KTkk0NNXZc96%i+&Bu+Q8;C(348%nz2)wHl#eRu3 zyiD*4t5NDGvbKD;%wKExl~=Bud9~L3q(~oHL!{%+paR^aR2=%^$E7a8P|Z98QAa)k zy-4VL@Zwn*+Njo7A&d|`jfhj)C`06_C2Bx`AW-wduC*;OYPb2tF4` zT4P81Y2!xp6%F?eoDtx^D4yI3-1Zv4*A35DKI`etJq$!I{Zti$!EOBXha#vVio|gP z>JX3Zf5DN{M#;{WpQ_ez{cb)DM^MH<+hZtC+D-_w}AsSvE@<|1w>L)Ca6i(f#Nxms^q)XaVe|=YmH21v%J-s;f(J$ zp|gxA-Gqwno}CZbVX)5j%Qqs9tDVq2y61YvijTeE$sxR=$LkZw&u zr|T7-GTd+2@lf@8e7)V8K`=JsV8mQELhZ(Go@qp3li~pCFKbE`fgcrS>dN};Pog9F z%lirQL|@BRw|1%&d(j%?U^dmplyvXYba4jl&k)G<{jPGpMJADZ|8LVJeEo011qmGV z%9{2T-3FK{zt8C3BIt?+*XT6~EP{1{4T4RAGC_qvWB?Q#?kh7Cap7UcHUMzY%jsVb zJur)>8qI~h#I2%uytq^6Ua`qG_M%%>sb<@7782y^;LNdau&54W)7^tdbj$1}6He@N z=oH3kML`6`5Me11Lqhl(<#lhE9DCE}<2UWU$@1fh%>-3BD87lv@j?4pM07=tg-{s{ z>qhWGH-t-$Xsa87=1RyOL8Uj$a~UmT8D>@Kt)+c0Px)k6xF0IT4l%S>n{8jYrcC?q zaa@udK;{%#XCW0J?ph(`4@$8n63{|cskDW)`u?y(zCY~F9gfKx1b@hiM2z2^*@#u& zf-WK{jzTINWpBtAnJ~nQE03pFDA3_~Pjn?SR!9k!AVS)E;(YC_QKG$JXO>U+rq z-0z7VT>^f@XO*Vidq#p<@L69QJ`Go(rH4=v_fq6Ls!#j+depTWN2+^k?WvR$+$!-4 z#jy8wL5Gjx+C6zB6fIS{{pid0;>nCbkxrKV(~a-O6D%8opn+N-4udA3p=aLX*!Vhv z(h`aBUC<9SqTh=q(#9-GNkV@xPSG*)W|CQqB*bd(M&FBK+`BGl7QP#gvR(+2{d&7C z=#~zfgLk5dK~Z(w*K&-e_v$}}G0FRn>FP`dLTrqamZk_0RWi_+yoliM_lwx+S457`Bh4H38LwiA;xk__F#o?IxJd9D z0PldJY0kotjN^K~i{*g-WV1+T^0haA#d~r7{iyZm73Q@%w^d^y!yhql0tDu zQ1x1bt?)lQ_t`L!xBU}1+z7}ZG@>j*az}R{XM^-D0_j^Ml)jPU8-?nC3vL}%0-#|k zh9jaLAnrJX{NM>UD`pJ*;slfgV!UgDrC$h5o|4YzAOPWLI2esyvt9}0%`q4jCHxET zmi{R%VxJIq^%~Lkh!pNmbQe~1E_Vfo>Zd7Qel4tzH$h(`8gIV@0!3b)OF7=2DNAA( z{8H*Q-f(M_3Ici4KH|l}-l?3q)1d@(DYpf) z^V|xAcfOf9HTr`3RPw=M*s?W<{BT{syBO3Vlm;r>T@jsPoXJ3NeDTy^5fFq0iiW4< z(TpY9ly;{|O6f^NVxU_}m*DWlnSYK#&{L=UIX^dO0nWZdKFA^SkBt@bD{vWIhD3X< zx@}tL2If==g-`GEDo-%u;ezd%sexibt4btNl12&H=T)7cSYE@`cr}rS>v^!y;K`cP z1GeKVCA7ajO>yr)?jGan{S$f*X3=|*8IPE<>?o}`8a>4FTXc<=WeoV9Q z7@}1=E#J%334j3%Cb|q;>f1};lHs)$XA_!ABI3m>4!8)F)*K{+L>Y!nVMX1^6{O4p+5aZ{Q`KrUjEM*^c1oEcqK6Mfk&jwQpNDPbi(> zrvFY4*9sr82?q`@TWnX+6EYsRFls#1mZh4LR1pean_A-Q7tkdTnLmzn(K-&B>7U-J zeSA%9U*@0~{$BNx!BK916PUTR{349Yd|iUDS>5MAnxfte1_%!JDkSu~daCu|{j!*4 zQ{ z0`QVS{a*SLR1^>!C;LN;=jB(Z5HBN|tNu*plj@cyW&9@7{0`5Vr#8$~{v<03~T zt8!LG@Vt)Bu%|T0sH2F7bxdbWBuV2i@=fmO{b@mwQI3r^h{5lpkQ^l;%ZFJmq)N2p zp>%1;O+!94)rPq)dO74~#F}TiGegUqPyrLt-a9m-eUgs=(Ru|B3Jt+%Hys!r<7u3< z37Z|HDKLebF5*kWV)!%{Ps%|yo|RN1lFBtl+rtj)8FRB?J@k*e!+{@95kWTI9+7kt z?nn?W{z*3h+1He_;DT;MOms(w)G*8(|+{>dG-Z%vA(=_ruPSU_FXj?~f|OY~~1NbFggY?;JIuc>oFZ zF;%`ibfoMx{JDoE4(ByukM5aQm3zU@J>1QSw*7tVkmE3`rS{!o>*oU@NvUfxE2lG_ zxwD9GpvFY17bh-{@8Ea~&rKZWD|`lEIfz-=gT0We=1YHA5DQTUjMY`*BLyZniIN%~ zbV#}E3t~=SqxV{O{5$v+d-ng)#RC|;L2yr6Qx`yUfhL$1vep{vEm+1cEJA;L49H z{6ND4*zookF^aXhT6A(x97vazn86#?d4JF#mqcHV^ z0V) zS15NzYHFPg*>8ax5a36+a9V@6;NwTQ4`Rm)Nvs;huu_(lju3mXJ2BotYirBk1nT zx+9?-=^WzcsTHN{NS%ewA~+kvpcz7ISDfvLht3D%GSF7LTikFL%WY`TWus1wVDQ(z z1#B{gC+Eg7vROR$0bJYsmACX8Z@J^<2ynp!en;^;iH9nn!39kGd(cXg=k9>5C!wo0 z5zMDM0X)fvJ2YsmjZte2Pc7sshubrFW@3WYsz8e$F zI?J(8n3cV9XMj;B-RS^}Nl&NTslJ|1`>AFTDj1gSzi|y(P2@buDiK|%InYjcCZj!% zRX63%VWt-384Fm9iWb(QyV!X^9+ALfezYHjTUpX7_W}fe%0icV7;}Sq7_e+T>07p* z6b-06EL)@YFL1o@Jq62F`WB#Po)=EkNvwtkIbxlq9nCpSy&w3_0>dJQx$xzvxd`m7 ztXLO151E{|Ll}{R3a8&CcVL0~aOaVrY--6AAI0cBWaebNG5_GmeG&7E=R-gbVxG*y zuCrxVnaNJJ0utybJ*+V-Kp}(H!e2Day z*vfK_&V{zJe{h%4oex$>=fmcMVrjgHHpm=_tt?A<%D0t88}3rdlFEU!A3;k`alGI@ zi8h{Adm6r=A12+Mr=c;1l~d`q{Au%yD=VeHFG#~Ozb12Vw)2eA2;y8w$U_oBtLGr( zVeSj+CCYp)fC77@^@^f-!qu>jO06FClUx3QFopWYQoICBK5gR0>WZwu}m$NKqI< zd^#}-`Vfzdq#<0Twd*|TJ7ZB_gf9vg1a}fono(p@KD|h~kTGd}613td=t4YI0^#hS zpf427`IfGr5mEO%>{`iB!P6*k!FG=pe5ix@X|o0h8`pSZpRgo^!;-W2(Ud@Fn8lMQ z6iVKY5SKyFyD%b)-m@wh}$?$qc4cG0xVgo$5)c>kI_{m_c+ExkDAo3wZ)Z^O@hBfE# zVViwygG6|*AAD%MEG3K(NVU`}^<<9p*jx2a+CI%ueWXCA_bq$BK>=%|w@K2BE?a)@ zDnJz?8tZF$&^R^F!dlq@7VJvl>(j|^Hrg9m!uJ!_t^+f{w+YxXu z0PrF^9_%Xey)Loy%EuJ)*(Yhwxgkde51B`?iJJ3pX!GhSPWCmAeIDDQ2@!3Lhr2oC zRp$-GQaOT*YuF&+hkKG=9#KsbKrHuVU(IOOV>TZYGTKJ%WB85OFDbkJS-HTAbw7po z*t@1Ni+|rT0Xu$Nry$;EE?n)58j0_Ckc%NMh3+_h6+cX^^}vKw&EebMg=(@=Qf5nG z1GnGUu&pqjztyl0VN`=r6uuD=tf${1pH9_wXLt8BdkP-Bj(}^TqeCU(pktWOZKa=AR55v?=}6={=o&CLj2cYO zIxr;?Apl1tQaX-q^bShTE`hwPZ|^;SLH0?wIX21^9&sL7eC7c;hNdIRPcHL1yguAfa@fKwAvVH3BEv(*88`a zN{Av+Fy7n+yqr(%r6GiJ-eQ7(MsSIMEwN7!a0~VT+4A^rN7^H-J%>n82|E71OWh&= zZ#VNGT<>JY32mm%pHQlw+#fJflvL7lyyx<3Nb5}m-sM1m zh{uxCJ#HQUcc{|D|1jd4%Ntk@&;z|qZn#pH;(oBB1ZXdKsg%0o>Ar?dg2fhkDM=TF zWZnN$WKW^Zg&9@1`tLI^dxw$uY8^%x>ak&tygh?-UJ85hio9OY!#9qr|8XEM8`!_W zw4|+MFnWelMwesiC7)87j8CIPt@$(niDdQo3dmdh&5A&fM*^9l8EDFZbdHREPUGK( K|GVJ-9{&H8G6`4! diff --git a/scnym/__pycache__/utils.cpython-38.pyc b/scnym/__pycache__/utils.cpython-38.pyc deleted file mode 100644 index b85fbc4de694a8a00900293fdfebb8b861ef9bbe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18831 zcmeHvYm6LMc3!=@`Y}CE&QhYJ)t0z=u&H5FBv&h&OFhV0QL-Mh8;aVcTg&aK>03S1 zJ>6B!Th%0IQnN;u+Q7(K3&&32$I`(tKpe|?Ifjvk^Z1iL$-e;muaF->0>ltt1VIu6 zhSuWdJLgtaKRA@=g<%9nn(XSTTlam>J&*64JMYfURy2H`|JwiF_`wxT`>*sc{wd?( z6@0@vL(_!T(1b3Gp5D;;*JzmhYc(wVn!Q5bZrHlYQ*4y@UAa-luZ4G&MkReW)2Je? z(3|bgHRk&Bjd@PDdkg)=#vVj( z-6Nb@Me3}Wd*5n2;vMqL?xUXFeXRTWwsAz`rvuX3hM2!?iUqNVQjdF9_Y2b{#S&^g zB$n~s5id-?<$9^5C#K(s!?z3KA@MN!cvSpMo=@e)RXl?CkBZ0e{)?|^;`8G1_cigj z_v9_J@s#-4iTqUNkhknTk&lGRf8lmfIN}MEe>%%8j-sz$6i=eB&xo^GD*E;W-HWHt z;?v?8lsJZYKh~@Dk2j8EUTfmGcvh^46Pty$?ma8&w{>w+JcoQM;wwBSC%iiTJ>iwQ zC&iiDw)m2G9&eu$#yeW=-0q>uyHTgtiFTa!&~HVZzz-`Q5&5ju^u!Jat_(fg6~f4k z!kUpRta+~V!f4IwY;J7?GE5G=ncrXXHt{YDBuX6ehOJ&lc&^_R9hCN49+fY=>!XdN zwCeVH?ncl12t=tN-NuvG3lm#<(NOv=O3pqOZVLFW;u~JXCD!(}JD|~RO&-QQ==qMm zr$t8B#FHh=J9?~d8h9?mTGtlV2j-p;8~a6J#2WINduD86^ab}vx~5eed{zVhx+kO1 zaeb%bN8Y9<9qy461+ukOcZX51DP2+jhATVNZ=^e}6ZqbVtsru;)Ov+$`?@RLz885i ziC;ZE0EG_MNX^dhGD1OX}Prgs+LuB z5A%@M=nwlFn3ABaa(f}(stF3yF<;_!n;0;cj$FCvMfqqi_5wFLaJXY{@@8qgt1J0{ zS7|uhT}O4O?tCp>0ncv*!V{|Zl6c-nSX)ifg~(1sI|DDVg?GKv@@l1|;)_lnvrfcV z9{S<6q37*-NrC4-DeQVO2osB@H7T~K&u31{Inos z)o4G|celM}TKyvG3=iX?Em!r5ZW$%rxNBIJZhdT*<_!5L-q2Y4n2Da8!BdNd zpM7Wx<=bct4-qy}SKl}8Xty8?j%a(>Hv8rsZO;$}wutdn?dsEeR%{AmyCl!X7Pgj- zjb>gu8k^bELz;TVb~4?k(cEg#9}FYU>78=?K=z$B5%remv^z44Ks9&?!r5VB;&pbN z^;NIeyNvZ&ee2@7!@Yl?jl?-~%JTjC+kqJNyqtQ*s#Uc*H#*UlvtCarp4&o!&!FJm zsiyQc^L7rT(^D%~?=xaGR`Ai3Bcv2s#r&_;3W?PNb8HmT9wkML2|Wsv%BVzQ(L~k? z@-dWYfKg)do=a@b5aOA;m+0SQnp2lYkTWUcjmrEbJcbLnXp2jFNw>@ru2o|}mtRC$ z%}n$w$qdNa+YBVwKuSB~EZ9cAoOXU64-wdIOw<52yAN9FT0|L9Vc(8TFj~C1qwkkM zOJ@4MjQ3VliOpyxwm@&j2gY6@F6>v4huHM2Fpp?)LH%wU;di1rVNIvEjl-bT@9NR~ z{z9zX(dDn>ZZSr=O%?(E5}5vydKLv?qYcn_Y_&<)72Uth^vRswSsxqk`Xo2M4z38Q zb$#KugMsG@#mKh6e9~87{vPqz(CrT(3&0E4udF{1&0YZ=`GdMITq)h16-PS8r_dGUvWRRbl#}*th3f!O{rtL z50C`o0~&QNA zx;@8@oTtPJzLIv%hk9bUH#=eNP+|;3GL!WVw3`_IASv+dCIylSp(MVURN6A=Hz78_ zRc$w9abWczdy`5wpU6rqG%3Yx-= z+)KzA9>qm_sG?W(W$a-?H+1{1ZI$$gv8PLSONENQU@YnKEOL=BaH-(E!hi-~;>$YK z3T}8qYh7$3`BbPLW$r zTNu$`x_e$j|H7+I)&ca?lO+73&tUvN-Cj*w4v;ozksY^2^Z~iRghX$moMS|BMlS<8AT+Y%&=r` z5?zEzRWZr*X`T|Bth6>syHb=%8h-Yn-AdInW&RzI8L26^^hoa-k=eDng|@L>ly#x2 z^dCZ_|1A@u#U`=xT^&?gBvGJ2-N~(Bh(0K$ro`14c^@qOh{zonzc94g9hiZnAyCf)DZuB5 z0ktA@eq)DI(=lMzrXy8*IqmJ@nEdtntgB>SZ0^aVS18(W? zhE;V^HzDRC8T7!yp}=?;$@Q?F@-hc2BfJwqyZss5b!}pNIT@b4|_eeg0p2w)i?kfPS0zPtX|}1L$L0w6R$nCR-53# zD)CAUnjba0Te(8^DRbrB<10>o2!m&Rr0hW$N@snf@TJ3CSL)7jqBh5el@O*Z-n;3x zq8@Za1&PqcVe_fJJKKkjK+n8x09!29s7N!~U8W_>-l=u-!nNJQ7co@=`V;1?--nRq zqV?M0#G($fFi482TB=0HIaDWK#3dqj}<#Az?r&iUNQxT+h5`D8#q6uh1BTLG@l1GqL z`bJ}3RjVWvUN;ufD)W?S5&7r_vAyObr%NZ|hv<6vIxgDdhxG+$Xcbspb7X^AbNaGr zL4ULF8b7g0Fyis=5q(L248KS*E2agVj!G>ct!h*a()r}iqppwUamj>j1;m`To|HCa zFeJ)I^zF}(tRJI_*Z_uBk`HIPpMX37k2mmavTMoD3r*FEXO+}N**QqNxo8Ictj6;sBavqzUWhG} zt{&0OY1e-H+DdG(G5CEb9lUSf=?6iyl>tjMn;iv&GJkfrX)_V?24TPu;bnHnm{2Al z(c?2pYx5N}>i8Kp`cudy?h$blg1&OI&+!1i2|+yp|7ZbCA<^pgTEiZLcBBO!&^pyK zqK*lEJKoM!N2%ecWaxK*gr$R2KxHaPQo_2gxd$6BEu7I{iVt#MJHVh(D>x#g;Yf~j z+UQ?!9g-5Z9cu-UEFm5tGVd9WbDZFtRFbB39anX2-&eBTTMMMzFjO-b^2%dYKSNZh ze9Z@%$^&K)vW**35`TMbEp6`An+zX}sr&$nd0}39>7}&Hc%v^m+rHntr!&X6NWEUC z<#w((FFIFNCX&Yb{UU6w8(b%7JYySPIGI0Rz!#lfFuV>$3(Sp|uF0>avTvisQ_ru| z6xwi*5fir7cKW$0NQ)~1mct#OwsH`VgvfWzRuBjXI#4)Nt{jlQu#@VJ&^S5_x?mpD zIihW`UL|I`6D5`HyjuBJ z=oPgGxhyyP?#)c{zDF;D2k^nKvx2U1B~FXj&mBnke~dcA>nge{7AQ{r?=Eg^SVRN%ye%M@Z}HYs1aaPgZY5TZ4PV|s(F6pkSX zCi~DmeOK`f2`qxZ)0%K`p$@}+g~1k|H7QcLfxCcL-{4xw0~CU@K&8w5YzodC1mns+ zL*Fj8zvzJePzI`M=JIp4J?VSBlr0e#net#!Qtrf3EQ*p`8Vp!3J4kpxP%3>6ZYIaH z>)7DfonXTlN7w=JO_V0-hGD{K2$)njuen12OW0rokr@@##yGA?&uItqn$`fajYJLn znerMY7K>noUa!p$({xs9LTBtm&E^Z3@sLr8MO)up%Ga=-Wq|kRNz?|I5Srf2#1Q>^ z^({g%a?t#5(V{ASYbn;dKrNt~7;vtT2KMbN1?Yq&FA>%OUuK#Mr9go2VOk&x^3rEI zRsp({4z0|J3qS+l(?q!fYGmLWwcB{MVc!=;2|##}2s18<${}r8dla}w6EHp5@58RC zY44<1o?6m;p=f>F(E?x|Qd-Gp88#9tN6?zELKz9>HY5`v+rN05sTq0UU<0V?b!PLK zACXq_!KiQ2pwH{13P^eS`qd1Fz!=6GVqm|WBI+Xo;f0*iBd9JcZ>K-(ryfU|kcrC1 z)QLXN!9+*$`G5ri&)1khqpC%JLMMX?G$4uR_9p6}!K_^!O>GLV(k1~XU{@}!z-Vop zeECMBqvqIDhn_p?io*(+cS0qR`!sL)fp(VzvI$|+Vn5ZHaxy0fHFwEt!%Izm|Gd%z zc*uF@O=dpMUti$I=-nFZV=dnK_I zfhsmwk`mdJsU6YXkV;vSOkVR!6F!t7jCF~~4`R>LjIs>MYx0V}Nt}{}@DpU+mW(;w zhJ6e1`-xpxN(HfHEfrwZ!d-1c81Eh$@zcB>@a1y0TSCQ4#|0$uV*0M)8~zL~zDb|A7@&`fnL14$t)M!y~J$}7V!7z5hH7Ws^aaRkLR0Ss>0-~ z4VWD{=qClv*jQlVfcb5mJv(AkRBij1ticBP7I0PP#IYhPC9?MI)1zE#yU#Nk2Wo@2 zgfgtdqnP<-HazfB=>NiMbrm2SIIm) zrIe_QH61$U+FGrWpe#X&&1O<*HUV59rU3WVW|N$$>6-$pZ8D^eurKIUx+8-?{x!P% zb-Ik4lYB>k1#*DPm#I$7i$JGmfv_9e7@J6w=q_8TO zTN@!XC&D^Mb_O#bweg98WuGP7(DR%4hNsbLsxnQ3R%Q-b6#)t%HbKEptU4J0oCwGQ zSOqxgkKwQ(Aj^!)V?b5`459)ci&MvxEI4oONvi_Bg7Zk;2Cy`v+97ug)G=#sHcN?T z0et<7>6QlP%v`q|&j>vxCC$_|QH}z}7rKtr_%^0IgzN$@&PV4FJ3DY`j1U-Up;sF2;*+`cwdSy%Lv5#dG(dhw{dQ z!28P%b}@M!%tA?(S{;MC6qq-H{0NQOa5pF*vg4&V6_e>ibtCQKJp`wvm_78uv>vcz zt`|T)1%@#C!hK5p1R1>YmlaAQmi*b8?VpQQ`@kCQxHda(C*A{XOe}ORs#xs)#UpzF z>Vl02gZC8Czc`{n1*C8dQ+QoQe-N4fAKs(5EO~R?kE^gcJY*$a-%J`DSb|DG$77g zwo%^9(7!PagQFH7IS}gOV!uZ^$3;}9omP0?vO@gyWkkX*AP{y=ub4LJ9?JZ%OS;Y4 z2%&&5M=V1#{?sav3vLOfiu_g7gn6bBBi=Ii`%J{1QzS9X9orp0z>Tsy;MXE_2L2_4 zDn1q=oKaaFkqI;g%EcYTN6XBq=>AVr(-583+4NJl4eSB0=TUqU3n1R^V<;iC zcjjyT0Pbn%yyOg?l3Z!fE+R9yaH@e%p)ympe9l!4G$fdT0+xs%SkWHGro#*1wE?$5 z;ZFOAo^OnB_$-=xv@)Nn3=FV>HvbU|F1XGJ`4ZCl)9meiPv(8IgWU!%Brn zWBPL_mLajR30AqXYIC_VwNzA!yGRI*nyPf}KiyT74GSC{;RcMd9eeM&75aCI|D9gp ze^+bVGI^KACUv~fGv}XK$-fuUd-`3SxF4%Xxel;Dv)MY9^=4(P5!LW07UI}@c`MW+ zwUB+OrLixX;!ck&$&nTL=ipbmOi{h5M=AFq<-Q|w3bW&D+#v=0FH*rr8#~-Ja{U0L z3N{J4%j`o6R#NMYD48Do#uT?eMCdx3a0t<6<(jz$CKtDh=xZFr8KWq+huPzay_gnI zFq8@!IkTQkHtbn6xIS7|1h=Dp+D}wDVi+j4#ovs!Ot3=jp+O*^46$0fnfK$sMx9l*nNTQdS~^@a@Fr?u3nnjMB3W#Qq;8Ec#U3#|7uc z(jlu{(~@BE9@1I@BBTyLd~xX~aPfexFB{8X=Z_$!VosO8fm*Y$2z`eID20pwLz5|_ zL=aN;p@8@zzHC{NJ&S-s1FTvnwtfqsQe?myLlA>7VU1a2jd8>}STKj^6}pzNMBxJp zfG~x#oSlXDlAb5qB7q9AzPOv_eT&x ztLeE9qBe*gNr7ariTa(J#Nb^>nViit{i~gmLq922|3ISVKEOX&6af8GWIwNv%#oc> zVc<-%`B^5)J?48wxp60&I(0ZK0q5iAWbe?6-LT#fu+>nns2h7S3{cN})cq*Hu9(U-1WGiW!{wL4g#rwzgW z%~pF_@aDZuTRez^U!J5Ej9gRCdZrRi$!ZHRyeAz+H)FvT60(JFbO>fM@$${gV`4P`G zc^w?Jwuo@Oj<4hjv0S!1q@cTJaiM?%j`t&(BX{ZYSLs4n9|sr8k&Gdiyaa!N;gWW? zwxDd5-=sHeBvF)|{F{`=@co93AQ|#vR1wJ}cbdur*;u@Xu$4^u4vIA_6xrc|$e-xl zM8BTsuTKl4BPlsH@2Tnn38u5CNjtWn>ONJap=5tID*63%Hs)^s8N2w(lc(6xvp znZ+S|TDL$U^^l1O288UwSub6?h9EwK@4?|BtlJbOUx>;uRSKfOL4-EWa?JqMv{g7@ zUOSx5TzeJBXYm%=i3>m|OKb8&^d~_W37><+d0>y?r$KVROX&rk1No0|uQZ><3qaoy zUXzXUtb*Q!3}}wtN4ch2Nx_cR2;Hqw{f5^!ZK2d&;cs*FBAwI&dcth z3K>eV;A5h8gwq%lVF2|7r>+UkMoO_;!m^WML_}1MZCJsuW4nkc#ZkW!&JO_UzC~vh zjv?;T?#_r3Y+kN8E(`l%jZY<%1Y$yesQngMx^Y~D!ZN!*N41Cwkrz~yl9;(oBswpu z=QJ_PeVjwT(>(hNVqPq~kG%Azy9mOZqZ1B%?tRhyb+D9+h#F4ya`rp*f*T|Q(%A`AT!Ubc#%?~4%JXMD z>NGmy#6k_$3d=O#Q5MrEnv};y(Rn-)(ox1HK#{(ZYNO;4^+8XWjQq*zl9iwXhMhv{ zW0iquQ4Q>iyaSX%xVbvigq^!SK88_Ij0J2ltuZD0sj9F_bgX`&w*EwQBrB)pM2+{S z)&a?3NZ7;Ln$j+)^B5kf6@i}*5@GYm1M?pur4L{kW}m;0rZR zIKuwkQ65PX^Nu*djEq&l{L!n>1aZ=uLYv1YfIEa4M?FVVMwM|c)=Q7BI7jp7&yb#S z+BurKGq@MDXc*0C(eBq7#SGD%Pf-eY=hc#YwSy%l#T8yn`ES9k6j%HExPzc9LBn9Z zgW=JSu%Dqzi{V5UcY!T%L<(VXS59<(Lwwi33R|TJ9O?t5}+YT&r2$!zaNs! zjQ!C9KI1I!aG|$3GJyXIfP9roo}3n&PJ zuM4C?6%c};TZZ*X^+~}0cdK6@CH(Lw)w-oG+^sqYN`RV$U<9}Y7Jgj4%wY=rOexln zt8XlP2Wh(YF$FHDe;^dY`jcu4Pj{f C_L_bG