Skip to content

Commit

Permalink
🚧 change import in all notebook (scripts)
Browse files Browse the repository at this point in the history
  • Loading branch information
Henry committed Jul 2, 2024
1 parent 9d6858b commit 563655e
Show file tree
Hide file tree
Showing 71 changed files with 957 additions and 1,023 deletions.
86 changes: 43 additions & 43 deletions project/00_5_training_data_exploration.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.15.0
# jupytext_version: 1.16.2
# kernelspec:
# display_name: vaep
# language: python
Expand Down Expand Up @@ -37,14 +37,14 @@
import pandas as pd
import seaborn as sns

import vaep
import vaep.data_handling
from vaep import plotting
from vaep.analyzers import analyzers
from vaep.pandas import missing_data
from vaep.utils import create_random_df
import pimmslearn
import pimmslearn.data_handling
from pimmslearn import plotting
from pimmslearn.analyzers import analyzers
from pimmslearn.pandas import missing_data
from pimmslearn.utils import create_random_df

logger = vaep.logging.setup_nb_logger()
logger = pimmslearn.logging.setup_nb_logger()
logging.getLogger('fontTools').setLevel(logging.WARNING)

matplotlib.rcParams.update({'font.size': 6,
Expand All @@ -58,7 +58,7 @@ def get_clustermap(data,
**kwargs):
from sklearn.impute import SimpleImputer

from vaep.pandas import _add_indices
from pimmslearn.pandas import _add_indices
X = SimpleImputer().fit_transform(data)
X = _add_indices(X, data)
cg = sns.clustermap(X,
Expand Down Expand Up @@ -226,27 +226,27 @@ def get_dynamic_range(min_max):
min_samples_per_feat=min_samples_per_feat)
fname = FIGUREFOLDER / 'dist_all_lineplot_w_cutoffs.pdf'
files_out[fname.name] = fname
vaep.savefig(fig, name=fname)
pimmslearn.savefig(fig, name=fname)

# %%
fig = plotting.data.plot_missing_dist_highdim(data)
fname = FIGUREFOLDER / 'dist_all_lineplot_wo_cutoffs.pdf'
files_out[fname.name] = fname
vaep.savefig(fig, name=fname)
pimmslearn.savefig(fig, name=fname)

# %%
fig = plotting.data.plot_missing_pattern_histogram(data,
min_feat_per_sample=min_feat_per_sample,
min_samples_per_feat=min_samples_per_feat)
fname = FIGUREFOLDER / 'dist_all_histogram_w_cutoffs.pdf'
files_out[fname.name] = fname
vaep.savefig(fig, name=fname)
pimmslearn.savefig(fig, name=fname)

# %%
fig = plotting.data.plot_missing_pattern_histogram(data)
fname = FIGUREFOLDER / 'dist_all_histogram_wo_cutoffs.pdf'
files_out[fname.name] = fname
vaep.savefig(fig, name=fname)
pimmslearn.savefig(fig, name=fname)

# %% [markdown]
# ### Boxplots
Expand All @@ -255,7 +255,7 @@ def get_dynamic_range(min_max):
fig = plotting.data.plot_missing_dist_boxplots(data)
fname = FIGUREFOLDER / 'dist_all_boxplots.pdf'
files_out[fname.name] = fname
vaep.savefig(fig, name=fname)
pimmslearn.savefig(fig, name=fname)

# %% [markdown]
# ### Violinplots
Expand All @@ -265,7 +265,7 @@ def get_dynamic_range(min_max):
data, min_feat_per_sample, min_samples_per_feat)
fname = FIGUREFOLDER / 'dist_all_violin_plot.pdf'
files_out[fname.name] = fname
vaep.savefig(fig, name=fname)
pimmslearn.savefig(fig, name=fname)

# %% [markdown]
# ## Feature medians over prop. of missing of feature
Expand All @@ -274,14 +274,14 @@ def get_dynamic_range(min_max):
data=data, type='scatter', s=1)
fname = FIGUREFOLDER / 'intensity_median_vs_prop_missing_scatter'
files_out[fname.stem] = fname
vaep.savefig(ax.get_figure(), fname)
pimmslearn.savefig(ax.get_figure(), fname)

# %%
ax = plotting.data.plot_feat_median_over_prop_missing(
data=data, type='boxplot', s=.8)
fname = FIGUREFOLDER / 'intensity_median_vs_prop_missing_boxplot'
files_out[fname.stem] = fname
vaep.savefig(ax.get_figure(), fname)
pimmslearn.savefig(ax.get_figure(), fname)


# %% [markdown]
Expand All @@ -305,7 +305,7 @@ def get_dynamic_range(min_max):
fig.suptitle(f'Histogram of correlations based on {FEATURES_CUTOFF_TEXT}')
fname = FIGUREFOLDER / 'corr_histogram_feat.pdf'
files_out[fname.name] = fname
vaep.savefig(fig, name=fname)
pimmslearn.savefig(fig, name=fname)


# %% [markdown]
Expand All @@ -318,7 +318,7 @@ def get_dynamic_range(min_max):
ax.set_title(f'Histogram of coefficient of variation (CV) of {FEATURES_CUTOFF_TEXT}')
fname = FIGUREFOLDER / 'CV_histogram_features.pdf'
files_out[fname.name] = fname
vaep.savefig(ax.get_figure(), name=fname)
pimmslearn.savefig(ax.get_figure(), name=fname)

# %% [markdown]
# ## Clustermap and heatmaps of missing values
Expand All @@ -327,7 +327,7 @@ def get_dynamic_range(min_max):
# needs to deal with duplicates
# notna = data.notna().T.drop_duplicates().T
# get index and column names
vaep.plotting.make_large_descriptors(5)
pimmslearn.plotting.make_large_descriptors(5)

cg = sns.clustermap(selected.notna(),
cbar_pos=None,
Expand All @@ -345,10 +345,10 @@ def get_dynamic_range(min_max):
cg.figure.tight_layout()
fname = FIGUREFOLDER / 'clustermap_present_absent_pattern.png'
files_out[fname.name] = fname
vaep.savefig(cg.figure,
name=fname,
pdf=False,
dpi=600)
pimmslearn.savefig(cg.figure,
name=fname,
pdf=False,
dpi=600)

# %% [markdown]
# based on cluster, plot heatmaps of features and samples
Expand All @@ -358,7 +358,7 @@ def get_dynamic_range(min_max):
cg.dendrogram_col.reordered_ind)) == selected.shape

# %%
vaep.plotting.make_large_descriptors(5)
pimmslearn.plotting.make_large_descriptors(5)
fig, ax = plt.subplots(figsize=(7.5, 3.5))
ax = sns.heatmap(
selected.iloc[cg.dendrogram_row.reordered_ind,
Expand All @@ -370,8 +370,8 @@ def get_dynamic_range(min_max):
)
ax.set_title(f'Heatmap of intensities clustered by missing pattern of {FEATURES_CUTOFF_TEXT}',
fontsize=8)
vaep.plotting.only_every_x_ticks(ax, x=2)
vaep.plotting.use_first_n_chars_in_labels(ax, x=SAMPLE_FIRST_N_CHARS)
pimmslearn.plotting.only_every_x_ticks(ax, x=2)
pimmslearn.plotting.use_first_n_chars_in_labels(ax, x=SAMPLE_FIRST_N_CHARS)
if PG_SEPARATOR is not None:
_new_labels = [_l.get_text().split(PG_SEPARATOR)[0]
for _l in ax.get_xticklabels()]
Expand All @@ -381,7 +381,7 @@ def get_dynamic_range(min_max):
ax.set_yticks([])
fname = FIGUREFOLDER / 'heatmap_intensities_ordered_by_missing_pattern.png'
files_out[fname.name] = fname
vaep.savefig(fig, name=fname, pdf=False, dpi=600)
pimmslearn.savefig(fig, name=fname, pdf=False, dpi=600)
# ax.get_figure().savefig(fname, dpi=300)

# %% [markdown]
Expand All @@ -400,8 +400,8 @@ def get_dynamic_range(min_max):
)
ax.set_title(f'Heatmap of feature correlation of {FEATURES_CUTOFF_TEXT}',
fontsize=8)
_ = vaep.plotting.only_every_x_ticks(ax, x=2)
_ = vaep.plotting.use_first_n_chars_in_labels(ax, x=SAMPLE_FIRST_N_CHARS)
_ = pimmslearn.plotting.only_every_x_ticks(ax, x=2)
_ = pimmslearn.plotting.use_first_n_chars_in_labels(ax, x=SAMPLE_FIRST_N_CHARS)
if PG_SEPARATOR is not None:
_new_labels = [_l.get_text().split(PG_SEPARATOR)[0]
for _l in ax.get_xticklabels()]
Expand All @@ -411,7 +411,7 @@ def get_dynamic_range(min_max):
ax.set_yticks([])
fname = FIGUREFOLDER / 'heatmap_feature_correlation.png'
files_out[fname.name] = fname
vaep.savefig(fig, name=fname, pdf=False, dpi=600)
pimmslearn.savefig(fig, name=fname, pdf=False, dpi=600)

# %%
lower_corr = analyzers.corr_lower_triangle(
Expand All @@ -427,18 +427,18 @@ def get_dynamic_range(min_max):
cbar_kws={'shrink': 0.75},
square=True,
)
_ = vaep.plotting.only_every_x_ticks(ax, x=2)
_ = vaep.plotting.use_first_n_chars_in_labels(ax, x=SAMPLE_FIRST_N_CHARS)
_ = pimmslearn.plotting.only_every_x_ticks(ax, x=2)
_ = pimmslearn.plotting.use_first_n_chars_in_labels(ax, x=SAMPLE_FIRST_N_CHARS)
if NO_TICK_LABELS_ON_HEATMAP:
ax.set_xticks([])
ax.set_yticks([])
ax.set_title(f'Heatmap of sample correlation based on {FEATURES_CUTOFF_TEXT}', fontsize=7)
fname = FIGUREFOLDER / 'heatmap_sample_correlation.png'
files_out[fname.name] = fname
vaep.savefig(fig, name=fname, pdf=False, dpi=600)
pimmslearn.savefig(fig, name=fname, pdf=False, dpi=600)

# %%
vaep.plotting.make_large_descriptors(6)
pimmslearn.plotting.make_large_descriptors(6)
kwargs = dict()
if NO_TICK_LABELS_ON_HEATMAP:
kwargs['xticklabels'] = False
Expand All @@ -449,15 +449,15 @@ def get_dynamic_range(min_max):
_new_labels = [_l.get_text().split(PG_SEPARATOR)[0]
for _l in ax.get_xticklabels()]
_ = ax.set_xticklabels(_new_labels)
_ = vaep.plotting.only_every_x_ticks(ax, x=2, axis=0)
_ = vaep.plotting.use_first_n_chars_in_labels(ax, x=SAMPLE_FIRST_N_CHARS)
_ = pimmslearn.plotting.only_every_x_ticks(ax, x=2, axis=0)
_ = pimmslearn.plotting.use_first_n_chars_in_labels(ax, x=SAMPLE_FIRST_N_CHARS)
# ax.set_title(f'Clustermap of intensities based on {FEATURES_CUTOFF_TEXT}', fontsize=7)
# cg.fig.tight_layout() # tight_layout makes the cbar a bit ugly
cg.fig.suptitle(f'Clustermap of intensities based on {FEATURES_CUTOFF_TEXT}', fontsize=7)
fname = FIGUREFOLDER / 'clustermap_intensities_normalized.png'
files_out[fname.name] = fname
cg.fig.savefig(fname, dpi=300) # avoid tight_layout
# vaep.savefig(cg.fig,
# pimmslearn.savefig(cg.fig,
# name=fname,
# pdf=False)

Expand All @@ -469,17 +469,17 @@ def get_dynamic_range(min_max):
COL_NO_MISSING, COL_NO_IDENTIFIED = f'no_missing_{TYPE}', f'no_identified_{TYPE}'
COL_PROP_SAMPLES = 'prop_samples'

sample_stats = vaep.data_handling.compute_stats_missing(
sample_stats = pimmslearn.data_handling.compute_stats_missing(
data.notna(), COL_NO_MISSING, COL_NO_IDENTIFIED)
sample_stats

# %%
vaep.plotting.make_large_descriptors(8)
pimmslearn.plotting.make_large_descriptors(8)
fig_ident = sns.relplot(
x='SampleID_int', y=COL_NO_IDENTIFIED, data=sample_stats)
fig_ident.set_axis_labels('Sample ID', f'Frequency of identified {TYPE}')
fig_ident.fig.suptitle(f'Frequency of identified {TYPE} by sample id', y=1.03)
vaep.savefig(fig_ident, f'identified_{TYPE}_by_sample', folder=FIGUREFOLDER)
pimmslearn.savefig(fig_ident, f'identified_{TYPE}_by_sample', folder=FIGUREFOLDER)

fig_ident_dist = sns.relplot(
x=COL_PROP_SAMPLES, y=COL_NO_IDENTIFIED, data=sample_stats)
Expand All @@ -489,7 +489,7 @@ def get_dynamic_range(min_max):
f'Frequency of identified {TYPE} groups by sample id', y=1.03)
fname = FIGUREFOLDER / f'identified_{TYPE}_ordered.pdf'
files_out[fname.name] = fname
vaep.savefig(fig_ident_dist, fname)
pimmslearn.savefig(fig_ident_dist, fname)

# %%
COL_NO_MISSING_PROP = COL_NO_MISSING + '_PROP'
Expand All @@ -505,7 +505,7 @@ def get_dynamic_range(min_max):

fname = FIGUREFOLDER / 'proportion_feat_missing.pdf'
files_out[fname.name] = fname
vaep.savefig(g, fname)
pimmslearn.savefig(g, fname)

# %% [markdown]
# ### Reference table intensities (log2)
Expand Down
18 changes: 9 additions & 9 deletions project/00_6_0_permute_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
"from typing import Union, List\n",
"\n",
"import numpy as np\n",
"import vaep\n",
"import vaep.analyzers.analyzers\n",
"from vaep.utils import create_random_df\n",
"import pimmslearn\n",
"import pimmslearn.analyzers.analyzers\n",
"from pimmslearn.utils import create_random_df\n",
"\n",
"logger = vaep.logging.setup_nb_logger()\n",
"logger = pimmslearn.logging.setup_nb_logger()\n",
"logger.info(\"Split data and make diagnostic plots\")"
]
},
Expand Down Expand Up @@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"args = vaep.nb.get_params(args, globals=globals())\n",
"args = pimmslearn.nb.get_params(args, globals=globals())\n",
"args"
]
},
Expand All @@ -105,7 +105,7 @@
},
"outputs": [],
"source": [
"args = vaep.nb.Config().from_dict(args)\n",
"args = pimmslearn.nb.Config().from_dict(args)\n",
"args"
]
},
Expand Down Expand Up @@ -166,7 +166,7 @@
"outputs": [],
"source": [
"constructor = getattr(\n",
" vaep.analyzers.analyzers.AnalyzePeptides,\n",
" pimmslearn.analyzers.analyzers.AnalyzePeptides,\n",
" FILE_FORMAT_TO_CONSTRUCTOR_IN[FILE_EXT]) # AnalyzePeptides.from_csv\n",
"analysis = constructor(fname=args.FN_INTENSITIES,\n",
" index_col=args.index_col,\n",
Expand Down Expand Up @@ -214,7 +214,7 @@
"\n",
"method = getattr(df, FILE_FORMAT_TO_CONSTRUCTOR.get(FILE_EXT))\n",
"\n",
"fname = vaep.utils.append_to_filepath(args.FN_INTENSITIES, 'permuted')\n",
"fname = pimmslearn.utils.append_to_filepath(args.FN_INTENSITIES, 'permuted')\n",
"method(fname)"
]
},
Expand All @@ -226,7 +226,7 @@
"outputs": [],
"source": [
"constructor = getattr(\n",
" vaep.analyzers.analyzers.AnalyzePeptides,\n",
" pimmslearn.analyzers.analyzers.AnalyzePeptides,\n",
" FILE_FORMAT_TO_CONSTRUCTOR_IN[FILE_EXT]) # AnalyzePeptides.from_csv\n",
"analysis = constructor(fname=args.FN_INTENSITIES,\n",
" index_col=args.index_col,\n",
Expand Down
18 changes: 9 additions & 9 deletions project/00_6_0_permute_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from typing import Union, List

import numpy as np
import vaep
import vaep.analyzers.analyzers
from vaep.utils import create_random_df
import pimmslearn
import pimmslearn.analyzers.analyzers
from pimmslearn.utils import create_random_df

logger = vaep.logging.setup_nb_logger()
logger = pimmslearn.logging.setup_nb_logger()
logger.info("Split data and make diagnostic plots")

# %%
Expand All @@ -38,11 +38,11 @@
file_format: str = 'pkl'

# %%
args = vaep.nb.get_params(args, globals=globals())
args = pimmslearn.nb.get_params(args, globals=globals())
args

# %%
args = vaep.nb.Config().from_dict(args)
args = pimmslearn.nb.Config().from_dict(args)
args


Expand Down Expand Up @@ -71,7 +71,7 @@

# %%
constructor = getattr(
vaep.analyzers.analyzers.AnalyzePeptides,
pimmslearn.analyzers.analyzers.AnalyzePeptides,
FILE_FORMAT_TO_CONSTRUCTOR_IN[FILE_EXT]) # AnalyzePeptides.from_csv
analysis = constructor(fname=args.FN_INTENSITIES,
index_col=args.index_col,
Expand All @@ -94,11 +94,11 @@

method = getattr(df, FILE_FORMAT_TO_CONSTRUCTOR.get(FILE_EXT))

fname = vaep.utils.append_to_filepath(args.FN_INTENSITIES, 'permuted')
fname = pimmslearn.utils.append_to_filepath(args.FN_INTENSITIES, 'permuted')
method(fname)
# %%
constructor = getattr(
vaep.analyzers.analyzers.AnalyzePeptides,
pimmslearn.analyzers.analyzers.AnalyzePeptides,
FILE_FORMAT_TO_CONSTRUCTOR_IN[FILE_EXT]) # AnalyzePeptides.from_csv
analysis = constructor(fname=args.FN_INTENSITIES,
index_col=args.index_col,
Expand Down
Loading

0 comments on commit 563655e

Please sign in to comment.