From d92a6085e4d246f75e0295b24b200b1f75ed8b3b Mon Sep 17 00:00:00 2001
From: Alma Andersson <andera29@gene.com>
Date: Thu, 24 Aug 2023 14:09:30 -0700
Subject: [PATCH 1/4] cosine and plot

---
 tangram/mapping_utils.py |   5 +-
 tangram/plot_utils.py    | 210 +++++++++++++++++++++++----------------
 tangram/utils.py         |  11 ++
 3 files changed, 139 insertions(+), 87 deletions(-)

diff --git a/tangram/mapping_utils.py b/tangram/mapping_utils.py
index bb39494..237aad7 100644
--- a/tangram/mapping_utils.py
+++ b/tangram/mapping_utils.py
@@ -355,10 +355,7 @@ def map_cells_to_space(
 
     # Annotate cosine similarity of each training gene
     G_predicted = adata_map.X.T @ S
-    cos_sims = []
-    for v1, v2 in zip(G.T, G_predicted.T):
-        norm_sq = np.linalg.norm(v1) * np.linalg.norm(v2)
-        cos_sims.append((v1 @ v2) / norm_sq)
+    cos_sims = ut.mat_cosine_similarity(G,G_predicted)
 
     df_cs = pd.DataFrame(cos_sims, training_genes, columns=["train_score"])
     df_cs = df_cs.sort_values(by="train_score", ascending=False)
diff --git a/tangram/plot_utils.py b/tangram/plot_utils.py
index 288022e..07d6ae0 100644
--- a/tangram/plot_utils.py
+++ b/tangram/plot_utils.py
@@ -1,22 +1,22 @@
 """
 This module includes plotting utility functions.
 """
-import numpy as np
-import matplotlib.pyplot as plt
 import logging
-import seaborn as sns
-from scipy.stats import entropy
+from collections.abc import Collection
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
 import scanpy as sc
+import seaborn as sns
+from matplotlib.gridspec import GridSpec
 from scipy.sparse.csc import csc_matrix
 from scipy.sparse.csr import csr_matrix
+from scipy.stats import entropy
 
-from . import utils as ut
 from . import mapping_utils as mu
-
-import pandas as pd
-import logging
-import matplotlib as mpl
-from matplotlib.gridspec import GridSpec
+from . import utils as ut
 
 
 def q_value(data, perc):
@@ -104,7 +104,7 @@ def plot_gene_sparsity(
         adata_1 (AnnData): Input data
         adata_2 (AnnData): Input data
         xlabel (str): Optional. For setting the xlabel in the plot. Default is 'adata_1'.
-        ylabel (str): Optional. For setting the ylabel in the plot. Default is 'adata_2'.  
+        ylabel (str): Optional. For setting the ylabel in the plot. Default is 'adata_2'.
         genes (list): Optional. List of genes to use. If `None`, all genes are used.
         s (float): Optional. Controls the size of marker. Default is 1.
 
@@ -138,7 +138,7 @@ def ordered_predictions(xs, ys, preds, reverse=False):
         ys (Pandas series): Sequence of y coordinates (floats).
         preds (Pandas series): Sequence of spatial prediction.
         reverse (bool): Optional. False will sort ascending, True will sort descending. Default is False.
-        
+
     Returns:
         Returns the ordered xs, ys, preds.
     """
@@ -173,41 +173,59 @@ def construct_obs_plot(df_plot, adata, perc=0, suffix=None):
 
 
 def plot_cell_annotation_sc(
-    adata_sp, 
-    annotation_list, 
-    x="x", 
-    y="y", 
-    spot_size=None, 
-    scale_factor=None, 
+    adata_sp,
+    annotation_list,
+    spatial_key: str | None = "spatial",
+    y="y",
+    x="x",
+    spot_size=None,
+    scale_factor=None,
     perc=0,
     alpha_img=1.0,
     bw=False,
-    ax=None
+    ax=None,
 ):
-        
     # remove previous df_plot in obs
     adata_sp.obs.drop(annotation_list, inplace=True, errors="ignore", axis=1)
 
     # construct df_plot
     df = adata_sp.obsm["tangram_ct_pred"][annotation_list]
     construct_obs_plot(df, adata_sp, perc=perc)
-    
-    #non visium data 
-    if 'spatial' not in adata_sp.obsm.keys():
-        #add spatial coordinates to obsm of spatial data 
-        coords = [[x,y] for x,y in zip(adata_sp.obs[x].values,adata_sp.obs[y].values)]
-        adata_sp.obsm['spatial'] = np.array(coords)
-    
-    if 'spatial' not in adata_sp.uns.keys() and spot_size == None and scale_factor == None:
-        raise ValueError("Spot Size and Scale Factor cannot be None when ad_sp.uns['spatial'] does not exist")
-    
-    #REVIEW
-    if 'spatial' in adata_sp.uns.keys() and spot_size != None and scale_factor != None:
-        raise ValueError("Spot Size and Scale Factor should be None when ad_sp.uns['spatial'] exists")
-    
+
+    # non visium data
+    if spatial_key not in adata_sp.obsm.keys():
+        # add spatial coordinates to obsm of spatial data
+        coords = [
+            [x, y] for x, y in zip(adata_sp.obs[x].values, adata_sp.obs[y].values)
+        ]
+        adata_sp.obsm["spatial"] = np.array(coords)
+
+    if (
+        "spatial" not in adata_sp.uns.keys()
+        and spot_size == None
+        and scale_factor == None
+    ):
+        raise ValueError(
+            "Spot Size and Scale Factor cannot be None when ad_sp.uns['spatial'] does not exist"
+        )
+
+    # REVIEW
+    if "spatial" in adata_sp.uns.keys() and spot_size != None and scale_factor != None:
+        raise ValueError(
+            "Spot Size and Scale Factor should be None when ad_sp.uns['spatial'] exists"
+        )
+
     sc.pl.spatial(
-        adata_sp, color=annotation_list, cmap="viridis", show=False, frameon=False, spot_size=spot_size,
-        scale_factor=scale_factor, alpha_img=alpha_img, bw=bw, ax=ax
+        adata_sp,
+        color=annotation_list,
+        cmap="viridis",
+        show=False,
+        frameon=False,
+        spot_size=spot_size,
+        scale_factor=scale_factor,
+        alpha_img=alpha_img,
+        bw=bw,
+        ax=ax,
     )
 
     adata_sp.obs.drop(annotation_list, inplace=True, errors="ignore", axis=1)
@@ -268,7 +286,10 @@ def plot_cell_annotation(
     fig.subplots_adjust(top=0.5)
 
     cmap = plt.get_cmap(cmap)
-    norm = mpl.colors.Normalize(vmin=0, vmax=1,)
+    norm = mpl.colors.Normalize(
+        vmin=0,
+        vmax=1,
+    )
 
     cb1 = mpl.colorbar.ColorbarBase(
         ax, cmap=cmap, norm=norm, orientation="horizontal", label="Probability"
@@ -314,20 +335,24 @@ def plot_cell_annotation(
 
 
 def plot_genes_sc(
-    genes, 
-    adata_measured, 
+    genes,
+    adata_measured,
     adata_predicted,
     x="x",
-    y = "y",
-    spot_size=None, 
-    scale_factor=None, 
-    cmap="inferno", 
+    y="y",
+    spot_size=None,
+    scale_factor=None,
+    cmap="inferno",
     perc=0,
     alpha_img=1.0,
     bw=False,
-    return_figure=False
+    return_figure=False,
 ):
 
+    if not isinstance(genes,Collection):
+        genes = [genes]
+
+
     # remove df_plot in obs
     adata_measured.obs.drop(
         ["{} (measured)".format(gene) for gene in genes],
@@ -374,7 +399,9 @@ def plot_genes_sc(
             data.append(np.array(adata_measured[:, gene].X).flatten())
 
     df = pd.DataFrame(
-        data=np.array(data).T, columns=genes, index=adata_measured.obs.index,
+        data=np.array(data).T,
+        columns=genes,
+        index=adata_measured.obs.index,
     )
     construct_obs_plot(df, adata_measured, suffix="measured")
 
@@ -387,18 +414,30 @@ def plot_genes_sc(
 
     fig = plt.figure(figsize=(7, len(genes) * 3.5))
     gs = GridSpec(len(genes), 2, figure=fig)
-    
-    #non visium data
-    if 'spatial' not in adata_measured.obsm.keys():
-        #add spatial coordinates to obsm of spatial data 
-        coords = [[x,y] for x,y in zip(adata_measured.obs[x].values,adata_measured.obs[y].values)]
-        adata_measured.obsm['spatial'] = np.array(coords)
-        coords = [[x,y] for x,y in zip(adata_predicted.obs[x].values,adata_predicted.obs[y].values)]
-        adata_predicted.obsm['spatial'] = np.array(coords)
-
-    if ("spatial" not in adata_measured.uns.keys()) and (spot_size==None and scale_factor==None):
-        raise ValueError("Spot Size and Scale Factor cannot be None when ad_sp.uns['spatial'] does not exist")
-        
+
+    # non visium data
+    if "spatial" not in adata_measured.obsm.keys():
+        # add spatial coordinates to obsm of spatial data
+        coords = [
+            [x, y]
+            for x, y in zip(adata_measured.obs[x].values, adata_measured.obs[y].values)
+        ]
+        adata_measured.obsm["spatial"] = np.array(coords)
+        coords = [
+            [x, y]
+            for x, y in zip(
+                adata_predicted.obs[x].values, adata_predicted.obs[y].values
+            )
+        ]
+        adata_predicted.obsm["spatial"] = np.array(coords)
+
+    if ("spatial" not in adata_measured.uns.keys()) and (
+        spot_size == None and scale_factor == None
+    ):
+        raise ValueError(
+            "Spot Size and Scale Factor cannot be None when ad_sp.uns['spatial'] does not exist"
+        )
+
     for ix, gene in enumerate(genes):
         ax_m = fig.add_subplot(gs[ix, 0])
         sc.pl.spatial(
@@ -411,7 +450,7 @@ def plot_genes_sc(
             show=False,
             cmap=cmap,
             alpha_img=alpha_img,
-            bw=bw
+            bw=bw,
         )
         ax_p = fig.add_subplot(gs[ix, 1])
         sc.pl.spatial(
@@ -424,9 +463,9 @@ def plot_genes_sc(
             show=False,
             cmap=cmap,
             alpha_img=alpha_img,
-            bw=bw
+            bw=bw,
         )
-        
+
     #     sc.pl.spatial(adata_measured, color=['{} (measured)'.format(gene) for gene in genes], frameon=False)
     #     sc.pl.spatial(adata_predicted, color=['{} (predicted)'.format(gene) for gene in genes], frameon=False)
 
@@ -443,7 +482,7 @@ def plot_genes_sc(
         errors="ignore",
         axis=1,
     )
-    if return_figure==True:
+    if return_figure == True:
         return fig
 
 
@@ -500,7 +539,10 @@ def plot_genes(
     fig.subplots_adjust(top=0.5)
 
     cmap = plt.get_cmap(cmap)
-    norm = mpl.colors.Normalize(vmin=0, vmax=1,)
+    norm = mpl.colors.Normalize(
+        vmin=0,
+        vmax=1,
+    )
 
     cb1 = mpl.colorbar.ColorbarBase(
         ax, cmap=cmap, norm=norm, orientation="horizontal", label="Expression Level"
@@ -516,7 +558,9 @@ def plot_genes(
             vs = np.array(adata_measured[:, gene].X).flatten()
 
         xs, ys, vs = ordered_predictions(
-            adata_measured.obs[x], adata_measured.obs[y], vs,
+            adata_measured.obs[x],
+            adata_measured.obs[y],
+            vs,
         )
 
         if log:
@@ -554,7 +598,7 @@ def quick_plot_gene(
 ):
     """
     Utility function to quickly plot a gene in a AnnData structure ordered by intensity of the gene signal.
-    
+
     Args:
         gene (str): Gene name.
         adata (AnnData): spot-by-gene spatial data.
@@ -609,9 +653,9 @@ def plot_annotation_entropy(adata_map, annotation="cell_type"):
 def plot_test_scores(df_gene_score, bins=10, alpha=0.7):
     """
     Plots gene level test scores with each gene's sparsity for mapping result.
-    
+
     Args:
-        df_gene_score (Pandas dataframe): returned by compare_spatial_geneexp(adata_ge, adata_sp, adata_sc); 
+        df_gene_score (Pandas dataframe): returned by compare_spatial_geneexp(adata_ge, adata_sp, adata_sc);
                        with "gene names" as the index and "score", "sparsity_sc", "sparsity_sp", "sparsity_diff" as the columns
         bins (int or string): Optional. Default is 10.
         alpha (float): Optional. Ranges from 0-1, and controls the opacity. Default is 0.7.
@@ -658,40 +702,40 @@ def plot_test_scores(df_gene_score, bins=10, alpha=0.7):
     )
     plt.tight_layout()
 
-    
+
 def plot_auc(df_all_genes, test_genes=None):
     """
         Plots auc curve which is used to evaluate model performance.
-    
+
     Args:
-        df_all_genes (Pandas dataframe): returned by compare_spatial_geneexp(adata_ge, adata_sp); 
+        df_all_genes (Pandas dataframe): returned by compare_spatial_geneexp(adata_ge, adata_sp);
         test_genes (list): list of test genes, if not given, test_genes will be set to genes where 'is_training' field is False
 
     Returns:
         None
     """
     metric_dict, ((pol_xs, pol_ys), (xs, ys)) = ut.eval_metric(df_all_genes, test_genes)
-    
+
     fig = plt.figure()
     plt.figure(figsize=(6, 5))
 
-    plt.plot(pol_xs, pol_ys, c='r')
-    sns.scatterplot(xs, ys, alpha=0.5, edgecolors='face')
-        
+    plt.plot(pol_xs, pol_ys, c="r")
+    sns.scatterplot(xs, ys, alpha=0.5, edgecolors="face")
+
     plt.xlim([0.0, 1.0])
     plt.ylim([0.0, 1.0])
-    plt.gca().set_aspect(.5)
-    plt.xlabel('score')
-    plt.ylabel('spatial sparsity')
-    plt.tick_params(axis='both', labelsize=8)
-    plt.title('Prediction on test transcriptome')
-    
-    textstr = 'auc_score={}'.format(np.round(metric_dict['auc_score'], 3))
-    props = dict(boxstyle='round', facecolor='wheat', alpha=0.3)
+    plt.gca().set_aspect(0.5)
+    plt.xlabel("score")
+    plt.ylabel("spatial sparsity")
+    plt.tick_params(axis="both", labelsize=8)
+    plt.title("Prediction on test transcriptome")
+
+    textstr = "auc_score={}".format(np.round(metric_dict["auc_score"], 3))
+    props = dict(boxstyle="round", facecolor="wheat", alpha=0.3)
     # place a text box in upper left in axes coords
-    plt.text(0.03, 0.1, textstr, fontsize=11, verticalalignment='top', bbox=props);
+    plt.text(0.03, 0.1, textstr, fontsize=11, verticalalignment="top", bbox=props)
+
 
-    
 # Colors used in the manuscript for deterministic assignment.
 mapping_colors = {
     "L6 CT": (0.19215686274509805, 0.5098039215686274, 0.7411764705882353),
diff --git a/tangram/utils.py b/tangram/utils.py
index e92c38d..6a50916 100644
--- a/tangram/utils.py
+++ b/tangram/utils.py
@@ -819,3 +819,14 @@ def df_to_cell_types(df, cell_types):
             end_ind = j[i]
             cell_types_mapped[i].extend(j["centroids"][start_ind:end_ind].tolist())
     return cell_types_mapped
+
+
+@njit
+def mat_cosine_similarity(V1,V2, axis = 0):
+    n_1 = np.sum(V1 * V1,axis = axis) ** 0.5
+    n_2 = np.sum(V2 * V2,axis = axis) ** 0.5
+    norms_sq = n_1 * n_2
+    ewise = V1 * V2
+    dot_unorm = np.sum(ewise,axis =axis)
+    cs = dot_unorm / norms_sq
+    return cs

From d12d87a57d2559dad1ac84e0dcaac048fae2ca1d Mon Sep 17 00:00:00 2001
From: Alma Andersson <andera29@gene.com>
Date: Thu, 24 Aug 2023 14:29:46 -0700
Subject: [PATCH 2/4] fix plot

---
 tangram/plot_utils.py | 42 ++++++++++++++++++++----------------------
 tangram/utils.py      |  1 +
 2 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/tangram/plot_utils.py b/tangram/plot_utils.py
index 07d6ae0..ff6ec60 100644
--- a/tangram/plot_utils.py
+++ b/tangram/plot_utils.py
@@ -2,7 +2,7 @@
 This module includes plotting utility functions.
 """
 import logging
-from collections.abc import Collection
+from collections.abc import Sequence
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
@@ -338,6 +338,7 @@ def plot_genes_sc(
     genes,
     adata_measured,
     adata_predicted,
+    spatial_key: str | None = "spatial",
     x="x",
     y="y",
     spot_size=None,
@@ -348,20 +349,20 @@ def plot_genes_sc(
     bw=False,
     return_figure=False,
 ):
-
-    if not isinstance(genes,Collection):
-        genes = [genes]
-
+    if isinstance(genes, str):
+        _genes = [genes]
+    else:
+        _genes = genes
 
     # remove df_plot in obs
     adata_measured.obs.drop(
-        ["{} (measured)".format(gene) for gene in genes],
+        ["{} (measured)".format(gene) for gene in _genes],
         inplace=True,
         errors="ignore",
         axis=1,
     )
     adata_predicted.obs.drop(
-        ["{} (predicted)".format(gene) for gene in genes],
+        ["{} (predicted)".format(gene) for gene in _genes],
         inplace=True,
         errors="ignore",
         axis=1,
@@ -378,13 +379,13 @@ def plot_genes_sc(
 
     # remove previous df_plot in obs
     adata_measured.obs.drop(
-        ["{} (measured)".format(gene) for gene in genes],
+        ["{} (measured)".format(gene) for gene in _genes],
         inplace=True,
         errors="ignore",
         axis=1,
     )
     adata_predicted.obs.drop(
-        ["{} (predicted)".format(gene) for gene in genes],
+        ["{} (predicted)".format(gene) for gene in _genes],
         inplace=True,
         errors="ignore",
         axis=1,
@@ -392,7 +393,7 @@ def plot_genes_sc(
 
     # construct df_plot
     data = []
-    for ix, gene in enumerate(genes):
+    for ix, gene in enumerate(_genes):
         if gene not in adata_measured.var.index:
             data.append(np.zeros_like(np.array(adata_measured[:, 0].X).flatten()))
         else:
@@ -400,23 +401,23 @@ def plot_genes_sc(
 
     df = pd.DataFrame(
         data=np.array(data).T,
-        columns=genes,
+        columns=_genes,
         index=adata_measured.obs.index,
     )
     construct_obs_plot(df, adata_measured, suffix="measured")
 
     df = pd.DataFrame(
-        data=np.array(adata_predicted[:, genes].X),
-        columns=genes,
+        data=np.array(adata_predicted[:, _genes].X),
+        columns=_genes,
         index=adata_predicted.obs.index,
     )
     construct_obs_plot(df, adata_predicted, perc=perc, suffix="predicted")
 
-    fig = plt.figure(figsize=(7, len(genes) * 3.5))
-    gs = GridSpec(len(genes), 2, figure=fig)
+    fig = plt.figure(figsize=(7, len(_genes) * 3.5))
+    gs = GridSpec(len(_genes), 2, figure=fig)
 
     # non visium data
-    if "spatial" not in adata_measured.obsm.keys():
+    if spatial_key not in adata_measured.obsm.keys():
         # add spatial coordinates to obsm of spatial data
         coords = [
             [x, y]
@@ -438,7 +439,7 @@ def plot_genes_sc(
             "Spot Size and Scale Factor cannot be None when ad_sp.uns['spatial'] does not exist"
         )
 
-    for ix, gene in enumerate(genes):
+    for ix, gene in enumerate(_genes):
         ax_m = fig.add_subplot(gs[ix, 0])
         sc.pl.spatial(
             adata_measured,
@@ -466,18 +467,15 @@ def plot_genes_sc(
             bw=bw,
         )
 
-    #     sc.pl.spatial(adata_measured, color=['{} (measured)'.format(gene) for gene in genes], frameon=False)
-    #     sc.pl.spatial(adata_predicted, color=['{} (predicted)'.format(gene) for gene in genes], frameon=False)
-
     # remove df_plot in obs
     adata_measured.obs.drop(
-        ["{} (measured)".format(gene) for gene in genes],
+        ["{} (measured)".format(gene) for gene in _genes],
         inplace=True,
         errors="ignore",
         axis=1,
     )
     adata_predicted.obs.drop(
-        ["{} (predicted)".format(gene) for gene in genes],
+        ["{} (predicted)".format(gene) for gene in _genes],
         inplace=True,
         errors="ignore",
         axis=1,
diff --git a/tangram/utils.py b/tangram/utils.py
index 6a50916..791ec5f 100644
--- a/tangram/utils.py
+++ b/tangram/utils.py
@@ -17,6 +17,7 @@
 import warnings
 
 from sklearn.metrics import auc
+from numba import njit
 
 # import torch
 # from torch.nn.functional import cosine_similarity

From 75838dee13ccd6ee72c98505846280eb94159d23 Mon Sep 17 00:00:00 2001
From: Alma Andersson <andera29@gene.com>
Date: Thu, 24 Aug 2023 14:31:39 -0700
Subject: [PATCH 3/4] lower genes

---
 tangram/plot_utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tangram/plot_utils.py b/tangram/plot_utils.py
index ff6ec60..4a1dde2 100644
--- a/tangram/plot_utils.py
+++ b/tangram/plot_utils.py
@@ -348,12 +348,16 @@ def plot_genes_sc(
     alpha_img=1.0,
     bw=False,
     return_figure=False,
+        lower_gene_names: bool = False,
 ):
     if isinstance(genes, str):
         _genes = [genes]
     else:
         _genes = genes
 
+    if lower_gen_names:
+        _genes = [g.lower() for g in _genes]
+
     # remove df_plot in obs
     adata_measured.obs.drop(
         ["{} (measured)".format(gene) for gene in _genes],

From 92872b172b09c3e2b6fba6c95183074bc2815c3b Mon Sep 17 00:00:00 2001
From: Alma Andersson <andera29@gene.com>
Date: Thu, 24 Aug 2023 14:32:43 -0700
Subject: [PATCH 4/4] plot

---
 tangram/plot_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tangram/plot_utils.py b/tangram/plot_utils.py
index 4a1dde2..2db47a9 100644
--- a/tangram/plot_utils.py
+++ b/tangram/plot_utils.py
@@ -348,14 +348,14 @@ def plot_genes_sc(
     alpha_img=1.0,
     bw=False,
     return_figure=False,
-        lower_gene_names: bool = False,
+    lower_gene_names: bool = False,
 ):
     if isinstance(genes, str):
         _genes = [genes]
     else:
         _genes = genes
 
-    if lower_gen_names:
+    if lower_gene_names:
         _genes = [g.lower() for g in _genes]
 
     # remove df_plot in obs