From d92a6085e4d246f75e0295b24b200b1f75ed8b3b Mon Sep 17 00:00:00 2001 From: Alma Andersson Date: Thu, 24 Aug 2023 14:09:30 -0700 Subject: [PATCH 1/4] cosine and plot --- tangram/mapping_utils.py | 5 +- tangram/plot_utils.py | 210 +++++++++++++++++++++++---------------- tangram/utils.py | 11 ++ 3 files changed, 139 insertions(+), 87 deletions(-) diff --git a/tangram/mapping_utils.py b/tangram/mapping_utils.py index bb39494..237aad7 100644 --- a/tangram/mapping_utils.py +++ b/tangram/mapping_utils.py @@ -355,10 +355,7 @@ def map_cells_to_space( # Annotate cosine similarity of each training gene G_predicted = adata_map.X.T @ S - cos_sims = [] - for v1, v2 in zip(G.T, G_predicted.T): - norm_sq = np.linalg.norm(v1) * np.linalg.norm(v2) - cos_sims.append((v1 @ v2) / norm_sq) + cos_sims = ut.mat_cosine_similarity(G,G_predicted) df_cs = pd.DataFrame(cos_sims, training_genes, columns=["train_score"]) df_cs = df_cs.sort_values(by="train_score", ascending=False) diff --git a/tangram/plot_utils.py b/tangram/plot_utils.py index 288022e..07d6ae0 100644 --- a/tangram/plot_utils.py +++ b/tangram/plot_utils.py @@ -1,22 +1,22 @@ """ This module includes plotting utility functions. """ -import numpy as np -import matplotlib.pyplot as plt import logging -import seaborn as sns -from scipy.stats import entropy +from collections.abc import Collection + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd import scanpy as sc +import seaborn as sns +from matplotlib.gridspec import GridSpec from scipy.sparse.csc import csc_matrix from scipy.sparse.csr import csr_matrix +from scipy.stats import entropy -from . import utils as ut from . import mapping_utils as mu - -import pandas as pd -import logging -import matplotlib as mpl -from matplotlib.gridspec import GridSpec +from . import utils as ut def q_value(data, perc): @@ -104,7 +104,7 @@ def plot_gene_sparsity( adata_1 (AnnData): Input data adata_2 (AnnData): Input data xlabel (str): Optional. For setting the xlabel in the plot. Default is 'adata_1'. - ylabel (str): Optional. For setting the ylabel in the plot. Default is 'adata_2'. + ylabel (str): Optional. For setting the ylabel in the plot. Default is 'adata_2'. genes (list): Optional. List of genes to use. If `None`, all genes are used. s (float): Optional. Controls the size of marker. Default is 1. @@ -138,7 +138,7 @@ def ordered_predictions(xs, ys, preds, reverse=False): ys (Pandas series): Sequence of y coordinates (floats). preds (Pandas series): Sequence of spatial prediction. reverse (bool): Optional. False will sort ascending, True will sort descending. Default is False. - + Returns: Returns the ordered xs, ys, preds. """ @@ -173,41 +173,59 @@ def construct_obs_plot(df_plot, adata, perc=0, suffix=None): def plot_cell_annotation_sc( - adata_sp, - annotation_list, - x="x", - y="y", - spot_size=None, - scale_factor=None, + adata_sp, + annotation_list, + spatial_key: str | None = "spatial", + y="y", + x="x", + spot_size=None, + scale_factor=None, perc=0, alpha_img=1.0, bw=False, - ax=None + ax=None, ): - # remove previous df_plot in obs adata_sp.obs.drop(annotation_list, inplace=True, errors="ignore", axis=1) # construct df_plot df = adata_sp.obsm["tangram_ct_pred"][annotation_list] construct_obs_plot(df, adata_sp, perc=perc) - - #non visium data - if 'spatial' not in adata_sp.obsm.keys(): - #add spatial coordinates to obsm of spatial data - coords = [[x,y] for x,y in zip(adata_sp.obs[x].values,adata_sp.obs[y].values)] - adata_sp.obsm['spatial'] = np.array(coords) - - if 'spatial' not in adata_sp.uns.keys() and spot_size == None and scale_factor == None: - raise ValueError("Spot Size and Scale Factor cannot be None when ad_sp.uns['spatial'] does not exist") - - #REVIEW - if 'spatial' in adata_sp.uns.keys() and spot_size != None and scale_factor != None: - raise ValueError("Spot Size and Scale Factor should be None when ad_sp.uns['spatial'] exists") - + + # non visium data + if spatial_key not in adata_sp.obsm.keys(): + # add spatial coordinates to obsm of spatial data + coords = [ + [x, y] for x, y in zip(adata_sp.obs[x].values, adata_sp.obs[y].values) + ] + adata_sp.obsm["spatial"] = np.array(coords) + + if ( + "spatial" not in adata_sp.uns.keys() + and spot_size == None + and scale_factor == None + ): + raise ValueError( + "Spot Size and Scale Factor cannot be None when ad_sp.uns['spatial'] does not exist" + ) + + # REVIEW + if "spatial" in adata_sp.uns.keys() and spot_size != None and scale_factor != None: + raise ValueError( + "Spot Size and Scale Factor should be None when ad_sp.uns['spatial'] exists" + ) + sc.pl.spatial( - adata_sp, color=annotation_list, cmap="viridis", show=False, frameon=False, spot_size=spot_size, - scale_factor=scale_factor, alpha_img=alpha_img, bw=bw, ax=ax + adata_sp, + color=annotation_list, + cmap="viridis", + show=False, + frameon=False, + spot_size=spot_size, + scale_factor=scale_factor, + alpha_img=alpha_img, + bw=bw, + ax=ax, ) adata_sp.obs.drop(annotation_list, inplace=True, errors="ignore", axis=1) @@ -268,7 +286,10 @@ def plot_cell_annotation( fig.subplots_adjust(top=0.5) cmap = plt.get_cmap(cmap) - norm = mpl.colors.Normalize(vmin=0, vmax=1,) + norm = mpl.colors.Normalize( + vmin=0, + vmax=1, + ) cb1 = mpl.colorbar.ColorbarBase( ax, cmap=cmap, norm=norm, orientation="horizontal", label="Probability" @@ -314,20 +335,24 @@ def plot_cell_annotation( def plot_genes_sc( - genes, - adata_measured, + genes, + adata_measured, adata_predicted, x="x", - y = "y", - spot_size=None, - scale_factor=None, - cmap="inferno", + y="y", + spot_size=None, + scale_factor=None, + cmap="inferno", perc=0, alpha_img=1.0, bw=False, - return_figure=False + return_figure=False, ): + if not isinstance(genes,Collection): + genes = [genes] + + # remove df_plot in obs adata_measured.obs.drop( ["{} (measured)".format(gene) for gene in genes], @@ -374,7 +399,9 @@ def plot_genes_sc( data.append(np.array(adata_measured[:, gene].X).flatten()) df = pd.DataFrame( - data=np.array(data).T, columns=genes, index=adata_measured.obs.index, + data=np.array(data).T, + columns=genes, + index=adata_measured.obs.index, ) construct_obs_plot(df, adata_measured, suffix="measured") @@ -387,18 +414,30 @@ def plot_genes_sc( fig = plt.figure(figsize=(7, len(genes) * 3.5)) gs = GridSpec(len(genes), 2, figure=fig) - - #non visium data - if 'spatial' not in adata_measured.obsm.keys(): - #add spatial coordinates to obsm of spatial data - coords = [[x,y] for x,y in zip(adata_measured.obs[x].values,adata_measured.obs[y].values)] - adata_measured.obsm['spatial'] = np.array(coords) - coords = [[x,y] for x,y in zip(adata_predicted.obs[x].values,adata_predicted.obs[y].values)] - adata_predicted.obsm['spatial'] = np.array(coords) - - if ("spatial" not in adata_measured.uns.keys()) and (spot_size==None and scale_factor==None): - raise ValueError("Spot Size and Scale Factor cannot be None when ad_sp.uns['spatial'] does not exist") - + + # non visium data + if "spatial" not in adata_measured.obsm.keys(): + # add spatial coordinates to obsm of spatial data + coords = [ + [x, y] + for x, y in zip(adata_measured.obs[x].values, adata_measured.obs[y].values) + ] + adata_measured.obsm["spatial"] = np.array(coords) + coords = [ + [x, y] + for x, y in zip( + adata_predicted.obs[x].values, adata_predicted.obs[y].values + ) + ] + adata_predicted.obsm["spatial"] = np.array(coords) + + if ("spatial" not in adata_measured.uns.keys()) and ( + spot_size == None and scale_factor == None + ): + raise ValueError( + "Spot Size and Scale Factor cannot be None when ad_sp.uns['spatial'] does not exist" + ) + for ix, gene in enumerate(genes): ax_m = fig.add_subplot(gs[ix, 0]) sc.pl.spatial( @@ -411,7 +450,7 @@ def plot_genes_sc( show=False, cmap=cmap, alpha_img=alpha_img, - bw=bw + bw=bw, ) ax_p = fig.add_subplot(gs[ix, 1]) sc.pl.spatial( @@ -424,9 +463,9 @@ def plot_genes_sc( show=False, cmap=cmap, alpha_img=alpha_img, - bw=bw + bw=bw, ) - + # sc.pl.spatial(adata_measured, color=['{} (measured)'.format(gene) for gene in genes], frameon=False) # sc.pl.spatial(adata_predicted, color=['{} (predicted)'.format(gene) for gene in genes], frameon=False) @@ -443,7 +482,7 @@ def plot_genes_sc( errors="ignore", axis=1, ) - if return_figure==True: + if return_figure == True: return fig @@ -500,7 +539,10 @@ def plot_genes( fig.subplots_adjust(top=0.5) cmap = plt.get_cmap(cmap) - norm = mpl.colors.Normalize(vmin=0, vmax=1,) + norm = mpl.colors.Normalize( + vmin=0, + vmax=1, + ) cb1 = mpl.colorbar.ColorbarBase( ax, cmap=cmap, norm=norm, orientation="horizontal", label="Expression Level" @@ -516,7 +558,9 @@ def plot_genes( vs = np.array(adata_measured[:, gene].X).flatten() xs, ys, vs = ordered_predictions( - adata_measured.obs[x], adata_measured.obs[y], vs, + adata_measured.obs[x], + adata_measured.obs[y], + vs, ) if log: @@ -554,7 +598,7 @@ def quick_plot_gene( ): """ Utility function to quickly plot a gene in a AnnData structure ordered by intensity of the gene signal. - + Args: gene (str): Gene name. adata (AnnData): spot-by-gene spatial data. @@ -609,9 +653,9 @@ def plot_annotation_entropy(adata_map, annotation="cell_type"): def plot_test_scores(df_gene_score, bins=10, alpha=0.7): """ Plots gene level test scores with each gene's sparsity for mapping result. - + Args: - df_gene_score (Pandas dataframe): returned by compare_spatial_geneexp(adata_ge, adata_sp, adata_sc); + df_gene_score (Pandas dataframe): returned by compare_spatial_geneexp(adata_ge, adata_sp, adata_sc); with "gene names" as the index and "score", "sparsity_sc", "sparsity_sp", "sparsity_diff" as the columns bins (int or string): Optional. Default is 10. alpha (float): Optional. Ranges from 0-1, and controls the opacity. Default is 0.7. @@ -658,40 +702,40 @@ def plot_test_scores(df_gene_score, bins=10, alpha=0.7): ) plt.tight_layout() - + def plot_auc(df_all_genes, test_genes=None): """ Plots auc curve which is used to evaluate model performance. - + Args: - df_all_genes (Pandas dataframe): returned by compare_spatial_geneexp(adata_ge, adata_sp); + df_all_genes (Pandas dataframe): returned by compare_spatial_geneexp(adata_ge, adata_sp); test_genes (list): list of test genes, if not given, test_genes will be set to genes where 'is_training' field is False Returns: None """ metric_dict, ((pol_xs, pol_ys), (xs, ys)) = ut.eval_metric(df_all_genes, test_genes) - + fig = plt.figure() plt.figure(figsize=(6, 5)) - plt.plot(pol_xs, pol_ys, c='r') - sns.scatterplot(xs, ys, alpha=0.5, edgecolors='face') - + plt.plot(pol_xs, pol_ys, c="r") + sns.scatterplot(xs, ys, alpha=0.5, edgecolors="face") + plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) - plt.gca().set_aspect(.5) - plt.xlabel('score') - plt.ylabel('spatial sparsity') - plt.tick_params(axis='both', labelsize=8) - plt.title('Prediction on test transcriptome') - - textstr = 'auc_score={}'.format(np.round(metric_dict['auc_score'], 3)) - props = dict(boxstyle='round', facecolor='wheat', alpha=0.3) + plt.gca().set_aspect(0.5) + plt.xlabel("score") + plt.ylabel("spatial sparsity") + plt.tick_params(axis="both", labelsize=8) + plt.title("Prediction on test transcriptome") + + textstr = "auc_score={}".format(np.round(metric_dict["auc_score"], 3)) + props = dict(boxstyle="round", facecolor="wheat", alpha=0.3) # place a text box in upper left in axes coords - plt.text(0.03, 0.1, textstr, fontsize=11, verticalalignment='top', bbox=props); + plt.text(0.03, 0.1, textstr, fontsize=11, verticalalignment="top", bbox=props) + - # Colors used in the manuscript for deterministic assignment. mapping_colors = { "L6 CT": (0.19215686274509805, 0.5098039215686274, 0.7411764705882353), diff --git a/tangram/utils.py b/tangram/utils.py index e92c38d..6a50916 100644 --- a/tangram/utils.py +++ b/tangram/utils.py @@ -819,3 +819,14 @@ def df_to_cell_types(df, cell_types): end_ind = j[i] cell_types_mapped[i].extend(j["centroids"][start_ind:end_ind].tolist()) return cell_types_mapped + + +@njit +def mat_cosine_similarity(V1,V2, axis = 0): + n_1 = np.sum(V1 * V1,axis = axis) ** 0.5 + n_2 = np.sum(V2 * V2,axis = axis) ** 0.5 + norms_sq = n_1 * n_2 + ewise = V1 * V2 + dot_unorm = np.sum(ewise,axis =axis) + cs = dot_unorm / norms_sq + return cs From d12d87a57d2559dad1ac84e0dcaac048fae2ca1d Mon Sep 17 00:00:00 2001 From: Alma Andersson Date: Thu, 24 Aug 2023 14:29:46 -0700 Subject: [PATCH 2/4] fix plot --- tangram/plot_utils.py | 42 ++++++++++++++++++++---------------------- tangram/utils.py | 1 + 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/tangram/plot_utils.py b/tangram/plot_utils.py index 07d6ae0..ff6ec60 100644 --- a/tangram/plot_utils.py +++ b/tangram/plot_utils.py @@ -2,7 +2,7 @@ This module includes plotting utility functions. """ import logging -from collections.abc import Collection +from collections.abc import Sequence import matplotlib as mpl import matplotlib.pyplot as plt @@ -338,6 +338,7 @@ def plot_genes_sc( genes, adata_measured, adata_predicted, + spatial_key: str | None = "spatial", x="x", y="y", spot_size=None, @@ -348,20 +349,20 @@ def plot_genes_sc( bw=False, return_figure=False, ): - - if not isinstance(genes,Collection): - genes = [genes] - + if isinstance(genes, str): + _genes = [genes] + else: + _genes = genes # remove df_plot in obs adata_measured.obs.drop( - ["{} (measured)".format(gene) for gene in genes], + ["{} (measured)".format(gene) for gene in _genes], inplace=True, errors="ignore", axis=1, ) adata_predicted.obs.drop( - ["{} (predicted)".format(gene) for gene in genes], + ["{} (predicted)".format(gene) for gene in _genes], inplace=True, errors="ignore", axis=1, @@ -378,13 +379,13 @@ def plot_genes_sc( # remove previous df_plot in obs adata_measured.obs.drop( - ["{} (measured)".format(gene) for gene in genes], + ["{} (measured)".format(gene) for gene in _genes], inplace=True, errors="ignore", axis=1, ) adata_predicted.obs.drop( - ["{} (predicted)".format(gene) for gene in genes], + ["{} (predicted)".format(gene) for gene in _genes], inplace=True, errors="ignore", axis=1, @@ -392,7 +393,7 @@ def plot_genes_sc( # construct df_plot data = [] - for ix, gene in enumerate(genes): + for ix, gene in enumerate(_genes): if gene not in adata_measured.var.index: data.append(np.zeros_like(np.array(adata_measured[:, 0].X).flatten())) else: @@ -400,23 +401,23 @@ def plot_genes_sc( df = pd.DataFrame( data=np.array(data).T, - columns=genes, + columns=_genes, index=adata_measured.obs.index, ) construct_obs_plot(df, adata_measured, suffix="measured") df = pd.DataFrame( - data=np.array(adata_predicted[:, genes].X), - columns=genes, + data=np.array(adata_predicted[:, _genes].X), + columns=_genes, index=adata_predicted.obs.index, ) construct_obs_plot(df, adata_predicted, perc=perc, suffix="predicted") - fig = plt.figure(figsize=(7, len(genes) * 3.5)) - gs = GridSpec(len(genes), 2, figure=fig) + fig = plt.figure(figsize=(7, len(_genes) * 3.5)) + gs = GridSpec(len(_genes), 2, figure=fig) # non visium data - if "spatial" not in adata_measured.obsm.keys(): + if spatial_key not in adata_measured.obsm.keys(): # add spatial coordinates to obsm of spatial data coords = [ [x, y] @@ -438,7 +439,7 @@ def plot_genes_sc( "Spot Size and Scale Factor cannot be None when ad_sp.uns['spatial'] does not exist" ) - for ix, gene in enumerate(genes): + for ix, gene in enumerate(_genes): ax_m = fig.add_subplot(gs[ix, 0]) sc.pl.spatial( adata_measured, @@ -466,18 +467,15 @@ def plot_genes_sc( bw=bw, ) - # sc.pl.spatial(adata_measured, color=['{} (measured)'.format(gene) for gene in genes], frameon=False) - # sc.pl.spatial(adata_predicted, color=['{} (predicted)'.format(gene) for gene in genes], frameon=False) - # remove df_plot in obs adata_measured.obs.drop( - ["{} (measured)".format(gene) for gene in genes], + ["{} (measured)".format(gene) for gene in _genes], inplace=True, errors="ignore", axis=1, ) adata_predicted.obs.drop( - ["{} (predicted)".format(gene) for gene in genes], + ["{} (predicted)".format(gene) for gene in _genes], inplace=True, errors="ignore", axis=1, diff --git a/tangram/utils.py b/tangram/utils.py index 6a50916..791ec5f 100644 --- a/tangram/utils.py +++ b/tangram/utils.py @@ -17,6 +17,7 @@ import warnings from sklearn.metrics import auc +from numba import njit # import torch # from torch.nn.functional import cosine_similarity From 75838dee13ccd6ee72c98505846280eb94159d23 Mon Sep 17 00:00:00 2001 From: Alma Andersson Date: Thu, 24 Aug 2023 14:31:39 -0700 Subject: [PATCH 3/4] lower genes --- tangram/plot_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tangram/plot_utils.py b/tangram/plot_utils.py index ff6ec60..4a1dde2 100644 --- a/tangram/plot_utils.py +++ b/tangram/plot_utils.py @@ -348,12 +348,16 @@ def plot_genes_sc( alpha_img=1.0, bw=False, return_figure=False, + lower_gene_names: bool = False, ): if isinstance(genes, str): _genes = [genes] else: _genes = genes + if lower_gen_names: + _genes = [g.lower() for g in _genes] + # remove df_plot in obs adata_measured.obs.drop( ["{} (measured)".format(gene) for gene in _genes], From 92872b172b09c3e2b6fba6c95183074bc2815c3b Mon Sep 17 00:00:00 2001 From: Alma Andersson Date: Thu, 24 Aug 2023 14:32:43 -0700 Subject: [PATCH 4/4] plot --- tangram/plot_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tangram/plot_utils.py b/tangram/plot_utils.py index 4a1dde2..2db47a9 100644 --- a/tangram/plot_utils.py +++ b/tangram/plot_utils.py @@ -348,14 +348,14 @@ def plot_genes_sc( alpha_img=1.0, bw=False, return_figure=False, - lower_gene_names: bool = False, + lower_gene_names: bool = False, ): if isinstance(genes, str): _genes = [genes] else: _genes = genes - if lower_gen_names: + if lower_gene_names: _genes = [g.lower() for g in _genes] # remove df_plot in obs