Skip to content

Commit

Permalink
Merge pull request #23 from broadinstitute/log_metric
Browse files Browse the repository at this point in the history
Log metric
  • Loading branch information
ziqlu0722 authored Mar 8, 2021
2 parents 557a635 + 5691078 commit 3e8eca6
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 40 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ __pycache__
# except for test datasets
!test_ad_sc_readytomap.h5ad
!test_ad_sp_readytomap.h5ad
!test_2_ad_sc_readytomap.h5ad
!test_2_ad_sp_readytomap.h5ad
example/data/

# Distribution / packaging
Expand Down
50 changes: 26 additions & 24 deletions tangram/mapping_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import torch
from torch.nn.functional import softmax, cosine_similarity

from comet_ml import Experiment


class Mapper:
"""
Expand Down Expand Up @@ -101,37 +103,29 @@ def _loss_fn(self, verbose=True):
kl_reg = (density_term / self.lambda_d).tolist() if density_term is not None else np.nan
vg_reg = (vg_term / self.lambda_g2).tolist()

if verbose:

if not np.isnan(kl_reg) and not np.isnan(vg_reg):
msg = 'Score: {:.3f}, KL reg: {:.3f}, VG reg: {:.3f}'.format(
main_loss, kl_reg, vg_reg
)
entropy_reg = (regularizer_term / self.lambda_r).tolist()

elif np.isnan(kl_reg) and np.isnan(vg_reg):
msg = 'Score: {:.3f}'.format(
main_loss
)
if verbose:

elif np.isnan(kl_reg):
msg = 'Score: {:.3f}, VG reg: {:.3f}'.format(
main_loss, vg_reg
)
term_numbers = [main_loss, kl_reg, vg_reg, entropy_reg]
term_names = ['Score', 'KL reg', 'VG reg', 'Entropy reg']

elif np.isnan(vg_reg):
msg = 'Score: {:.3f}, KL reg: {:.3f}'.format(
main_loss, kl_reg
)
d = dict(zip(term_names, term_numbers))
clean_dict = {k: d[k] for k in d if not np.isnan(d[k])}
msg = []
for k in clean_dict:
m = '{}: {:.3f}'.format(k, clean_dict[k])
msg.append(m)

print(msg)
print(str(msg).replace("[", "").replace("]", "").replace("'",""))

total_loss = - expression_term - regularizer_term
if density_term is not None:
total_loss = total_loss + density_term

return total_loss, main_loss, vg_reg, kl_reg
return total_loss, main_loss, vg_reg, kl_reg, entropy_reg

def train(self, num_epochs, learning_rate=0.1, print_each=100):
def train(self, num_epochs, learning_rate=0.1, print_each=100, experiment=None):
"""
Run the optimizer and returns the mapping outcome.
Args:
Expand All @@ -148,19 +142,27 @@ def train(self, num_epochs, learning_rate=0.1, print_each=100):
if print_each:
logging.info(f'Printing scores every {print_each} epochs.')

keys = ['total_loss', 'main_loss', 'vg_reg', 'gv_reg']
values = [[] for i in range(4)]
keys = ['total_loss', 'main_loss', 'vg_reg', 'kl_reg', 'entropy_reg']
values = [[] for i in range(len(keys))]
training_history = {key:value for key, value in zip(keys, values)}
for t in range(num_epochs):
if print_each is None or t % print_each != 0:
run_loss = self._loss_fn(verbose=False)
else:
run_loss = self._loss_fn(verbose=True)

loss = run_loss[0]
training_history['total_loss'].append(np.float64(loss))
training_history['main_loss'].append(np.float64(run_loss[1]))
training_history['vg_reg'].append(np.float64(run_loss[2]))
training_history['gv_reg'].append(np.float64(run_loss[3]))
training_history['kl_reg'].append(np.float64(run_loss[3]))
training_history['entropy_reg'].append(np.float64(run_loss[4]))

if experiment:
experiment.log_metric('main_loss', np.float64(run_loss[1]))
experiment.log_metric('vg_reg', np.float64(run_loss[2]))
experiment.log_metric('kl_reg', np.float64(run_loss[3]))
experiment.log_metric('entropy_reg', np.float64(run_loss[4]))

optimizer.zero_grad()
loss.backward()
Expand Down
5 changes: 4 additions & 1 deletion tangram/mapping_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ def adata_to_cluster_expression(adata, cluster_label, scale=True, add_density=Tr
def map_cells_to_space(adata_cells, adata_space, mode='cells', adata_map=None,
device='cuda:0', learning_rate=0.1, num_epochs=1000, d=None,
cluster_label=None, scale=True, lambda_d=0, lambda_g1=1, lambda_g2=0, lambda_r=0,
random_state=None, verbose=True):
random_state=None, verbose=True, experiment=None,
):
"""
Map single cell data (`adata_1`) on spatial data (`adata_2`). If `adata_map`
is provided, resume from previous mapping.
Expand All @@ -105,6 +106,7 @@ def map_cells_to_space(adata_cells, adata_space, mode='cells', adata_map=None,
:param lambda_g1 (float): Optional. Hyperparameter for the gene-voxel similarity term of the optimizer. Default is 1.
:param lambda_g2 (float): Optional. Hyperparameter for the voxel-gene similarity term of the optimizer. Default is 1.
:param lambda_r (float): Optional. Entropy regularizer for the learned mapping matrix. An higher entropy promotes probabilities of each cell peaked over a narrow portion of space. lambda_r = 0 corresponds to no entropy regularizer. Default is 0.
:param experiment: experiment object in comet-ml for logging training in comet-ml
"""

# check invalid values for arguments
Expand Down Expand Up @@ -199,6 +201,7 @@ def map_cells_to_space(adata_cells, adata_space, mode='cells', adata_map=None,
learning_rate=learning_rate,
num_epochs=num_epochs,
print_each=print_each,
experiment=experiment,
)

logging.info('Saving results..')
Expand Down
36 changes: 23 additions & 13 deletions tangram/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,17 @@ def project_genes(adata_map, adata_sc, cluster_label=None, scale=True):
Returns a spot-by-gene AnnData containing spatial gene
expression from the single cell data.
"""

adata_sc = adata_sc.copy()

# put all var index to lower case to align
adata_sc.var.index = [g.lower() for g in adata_sc.var.index]

adata_sc.var_names_make_unique()

# remove all-zero-valued genes
sc.pp.filter_genes(adata_sc, min_cells=1)

if cluster_label:
adata_sc = mu.adata_to_cluster_expression(adata_sc, cluster_label, scale=scale)

Expand Down Expand Up @@ -243,17 +254,16 @@ def cross_val(ad_sc,
scale: bool, whether weight input single cell by cluster data by # of cells in cluster, only valid when cluster_label is not None
mode: string, cross validation mode, 'loo' and 'kfold' supported
return_gene_pred: bool, if return prediction and true spatial expression data for test gene, only applicable when 'loo' mode is on, default is False
experiment: bool, experiment object in comet-ml for logging training in comet-ml
experiment: experiment object in comet-ml for logging training in comet-ml
Returns:
cv_dict: dict, a dictionary contains information of cross validation (hyperparameters, average test score and train score, etc.)
(df_test_gene_pred, df_test_gene_true): tuple, only return this tuple when return_gene_pred is True and mode is 'loo'
"""

if verbose==False:
logger_root = logging.getLogger()
logger_root.disabled=True
logger_ann = logging.getLogger("anndata")
logger_ann.disabled = True
logger_root = logging.getLogger()
logger_root.disabled=True
logger_ann = logging.getLogger("anndata")
logger_ann.disabled = True

test_genes_list = []
test_pred_list = []
Expand All @@ -276,7 +286,7 @@ def cross_val(ad_sc,
lambda_g2=lambda_g2,
lambda_r=lambda_r,
random_state=random_state,
verbose=verbose,
verbose=False,
)

# project on space
Expand All @@ -290,16 +300,16 @@ def cross_val(ad_sc,
# output scores
df_g = compare_spatial_geneexp(ad_ge, ad_sp)
test_score = df_g[df_g['is_training'] == False]['score'].mean()
train_score = df_g[df_g['is_training'] == True]['score'].mean()
train_score = list(adata_map.uns['training_history']['main_loss'])[-1]

# output avg score
test_genes_list.append(test_genes)
test_score_list.append(test_score)
train_score_list.append(train_score)

logging.info(
"cv set: {}----train score: {:.3f}----test score: {:.3f}\n".format(curr_cv_set, train_score, test_score)
)
if verbose == True:
msg = "cv set: {}----train score: {:.3f}----test score: {:.3f}".format(curr_cv_set, train_score, test_score)
print(msg)

if experiment:
experiment.log_metric('test_score_{}'.format(curr_cv_set), test_score)
Expand All @@ -317,8 +327,8 @@ def cross_val(ad_sc,
'avg_test_score': avg_test_score,
'avg_train_score': avg_train_score}

print('cv test score {:.3f}'.format(avg_test_score))
print('cv train score {:.3f}'.format(avg_train_score))
print('cv avg test score {:.3f}'.format(avg_test_score))
print('cv avg train score {:.3f}'.format(avg_train_score))

if experiment:
experiment.log_metric("avg test score", avg_test_score)
Expand Down
3 changes: 1 addition & 2 deletions tests/tangram_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def test_map_cells_to_space(ad_sc, ad_sp, mode, cluster_label, lambda_g1, lambda
scale=scale,
random_state=42,
num_epochs=500,
verbose=False)
verbose=True)

# check if first element of output_admap.X is equal to expected value
assert round(ad_map.X[0,0], 5) == round(e, 5)
Expand Down Expand Up @@ -155,4 +155,3 @@ def test_train_score_match(ad_sc, ad_sp, mode, cluster_label, lambda_g1, lambda_




0 comments on commit 3e8eca6

Please sign in to comment.