Skip to content

Commit

Permalink
Performance testing (#58)
Browse files Browse the repository at this point in the history
* performance tracking  multiple datasets and multiple runs
  • Loading branch information
njbernstein authored Feb 11, 2021
1 parent c584310 commit 16437fd
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.h5ad filter=lfs diff=lfs merge=lfs -text
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ solo.egg-info*
solo/__pycache_*
.ipynb_checkpoints
*/.ipynb_checkpoints
slurm-*
testdata/results*
logs/
Binary file removed testdata/1a.h5ad
Binary file not shown.
Binary file modified testdata/2c.h5ad
Binary file not shown.
71 changes: 71 additions & 0 deletions testdata/calculate_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env python

import anndata
import numpy as np
from sklearn.metrics import average_precision_score, roc_auc_score
from scipy.stats import mannwhitneyu
import matplotlib.pyplot as plt
import datetime
import pandas as pd
from glob import glob
'''
calculate performance
'''

###############################################################################
# main
###############################################################################

experiment_name_to_dataset = {'pbmc': '2c.h5ad',
'kidney': 'gene_ad_filtered_PoolB4FACs_L4_Rep1.h5ad'}

def main():
for result in glob('results_*/softmax_scores.npy'):
experiment_name = result.split("/")[0].split("_")[1]
experiment_number = result.split("/")[0].split("_")[2]
scores = np.load(result)
adata = anndata.read(experiment_name_to_dataset[experiment_name])
true_labels = adata.obs.doublet_bool
apr = average_precision_score(true_labels, scores)
auc = roc_auc_score(true_labels, scores)
time = datetime.datetime.now().strftime("%Y-%m-%d %H")
with open('tracking_performance.csv', 'a') as file:
file.write(f'{time},{experiment_name},{experiment_number},{apr},{auc}\n')

performance_tracking = pd.read_csv('tracking_performance.csv')
performance_tracking['date (dt)'] = pd.to_datetime(performance_tracking['date'], format="%Y-%m-%d %H")
for experiment_name, group in performance_tracking.groupby('experiment_name'):
fig, axes = plt.subplots(2, 1, figsize=(10,20))
ax = axes[0]
ax.plot(group['date'], group['average_precision'], '.')
ax.set_xlabel('date')
ax.set_ylabel('average precision')
ax = axes[1]
ax.plot(group['date'], group['AUROC'], '.')
ax.set_xlabel('date')
ax.set_ylabel('AUROC')
fig.savefig(f'{experiment_name}_performance_tracking.png')
second_to_last, most_recent = group['date (dt)'].drop_duplicates().sort_values()[-2:]
second_to_last_df = group[group['date (dt)'] == second_to_last]
most_recent_df = group[group['date (dt)'] == most_recent]
for metric in ['AUROC', 'average_precision']:
mean_change = most_recent_df[metric].mean() - second_to_last_df[metric].mean()
pvalue = mannwhitneyu(most_recent_df[metric], second_to_last_df[metric]).pvalue
print(f'Mean {metric} has changed by for {experiment_name}: {mean_change}')
print(f'P value for metric change {metric} in experiment {experiment_name}: {pvalue}')
if mean_change < 0 and pvalue < .05:
for x in range(0,5):
print('WARNING!')
print(f'WARNING {metric} HAS GOTTEN SIGNIFICANTLY WORSE for {experiment_name}!')
if mean_change > 0 and pvalue < .05:
for x in range(0,5):
print('NICE JOB!')
print(f'NICE JOB {metric} HAS GOTTEN SIGNIFICANTLY BETTER for {experiment_name}!')


###############################################################################
# __main__
###############################################################################

if __name__ == '__main__':
main()
3 changes: 3 additions & 0 deletions testdata/gene_ad_filtered_PoolB4FACs_L4_Rep1.h5ad
Git LFS file not shown
Binary file added testdata/kidney_performance_tracking.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added testdata/pbmc_performance_tracking.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 17 additions & 0 deletions testdata/performance_test_kidney_PoolB4FACs_L4_Rep1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/sh
#SBATCH -p gpu
#SBATCH -n 1
#SBATCH --gres=gpu:gtx1080ti:1
#SBATCH --mem 120000
#SBATCH --time 8:00:00
#SBATCH -J solo_permformace_test
#SBATCH --array=1-6
#SBATCH --ntasks-per-node=1
#SBATCH -o logs/solo_performance_%A_%a.out
#SBATCH -e logs/solo_performance_%A_%a.err

echo "My SLURM_ARRAY_TASK_ID: " $SLURM_ARRAY_TASK_ID
echo 'kidney'
source activate solo-sc
solo -g -r 2 -d 2 -t sum -o results_kidney_"$SLURM_ARRAY_TASK_ID" ../solo_params_example.json gene_ad_filtered_PoolB4FACs_L4_Rep1.h5ad

17 changes: 17 additions & 0 deletions testdata/performance_test_pbmc_2c.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/sh
#SBATCH -p gpu
#SBATCH -n 1
#SBATCH --gres=gpu:gtx1080ti:1
#SBATCH --mem 120000
#SBATCH --time 8:00:00
#SBATCH -J solo_permformace_test
#SBATCH --array=1-6
#SBATCH --ntasks-per-node=1
#SBATCH -o logs/solo_performance_%A_%a.out
#SBATCH -e logs/solo_performance_%A_%a.err

echo "My SLURM_ARRAY_TASK_ID: " $SLURM_ARRAY_TASK_ID
echo 'pbmc'
source activate solo-sc
solo -g -r 2 -d 2 -t sum -o results_pbmc_"$SLURM_ARRAY_TASK_ID" ../solo_params_example.json 2c.h5ad

Binary file added testdata/performance_tracking.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15 changes: 15 additions & 0 deletions testdata/tracking_performance.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
date,experiment_name,experiment_number,average_precision,AUROC
2020-02-11 10,pbmc,0,0.642,0.941
2020-02-11 10,kidney,0,0.652,0.756
2021-02-11 11,kidney,6,0.6556529157908413,0.7708304010670927
2021-02-11 11,kidney,1,0.6607756974710838,0.7726586445084818
2021-02-11 11,pbmc,4,0.6591329163786043,0.9271364908721599
2021-02-11 11,kidney,5,0.6494058766966985,0.7658514072862478
2021-02-11 11,kidney,2,0.6618481153467091,0.7730679879883625
2021-02-11 11,pbmc,3,0.6561663933351891,0.9281592671986435
2021-02-11 11,pbmc,1,0.6623743880515199,0.9226130098544348
2021-02-11 11,pbmc,6,0.6277683225071036,0.9207927916996268
2021-02-11 11,pbmc,2,0.6502136907750218,0.9275159493211569
2021-02-11 11,kidney,3,0.6467093639812479,0.7573174534495049
2021-02-11 11,kidney,4,0.6586798480352564,0.7664625647110882
2021-02-11 11,pbmc,5,0.6478040939493424,0.9225424292745276

0 comments on commit 16437fd

Please sign in to comment.