From 66fd0c3aaeac40e449621022dc8ded8a994bb4cc Mon Sep 17 00:00:00 2001 From: lruizcalico Date: Tue, 23 Apr 2024 20:32:02 -0700 Subject: [PATCH 1/3] add tensorboard --- src/westminster/scripts/westminster_classify.py | 9 +++++---- src/westminster/scripts/westminster_gtex_coef.py | 15 ++++++++++----- src/westminster/scripts/westminster_gtex_folds.py | 12 ++++++------ .../scripts/westminster_train_composer.py | 14 +++++++++++++- 4 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/westminster/scripts/westminster_classify.py b/src/westminster/scripts/westminster_classify.py index db1b2cd..5f5b215 100755 --- a/src/westminster/scripts/westminster_classify.py +++ b/src/westminster/scripts/westminster_classify.py @@ -35,11 +35,12 @@ def main(): help="Take features absolute value [Default: %default]", ) parser.add_option( - '-f', - dest='num_folds', + "-f", + dest="num_folds", default=8, - type='int', - help='Cross-validation folds [Default: %default]') + type="int", + help="Cross-validation folds [Default: %default]", + ) parser.add_option( "-i", dest="iterations", diff --git a/src/westminster/scripts/westminster_gtex_coef.py b/src/westminster/scripts/westminster_gtex_coef.py index f9f791b..d83c0a0 100755 --- a/src/westminster/scripts/westminster_gtex_coef.py +++ b/src/westminster/scripts/westminster_gtex_coef.py @@ -39,11 +39,12 @@ def main(): help="GTEx VCF directory", ) parser.add_argument( - '-m', - '--min_variants', + "-m", + "--min_variants", default=32, type=int, - help='Minimum number of variants for tissue to be included') + help="Minimum number of variants for tissue to be included", + ) parser.add_argument( "-p", "--plot", action="store_true", help="Generate tissue prediction plots" ) @@ -104,11 +105,15 @@ def main(): gtex_scores_file = f"{args.gtex_dir}/{tissue}_pos/scores.h5" try: variant_scores = read_scores( - gtex_scores_file, keyword, eqtl_df, args.snp_stat, verbose=args.verbose + gtex_scores_file, + keyword, + eqtl_df, + args.snp_stat, + verbose=args.verbose, ) variant_scores = variant_scores[eqtl_df.consistent] except TypeError: - print(f'Tracks matching {tissue} are missing', file=sys.stderr) + print(f"Tracks matching {tissue} are missing", file=sys.stderr) continue # compute sign AUROCs diff --git a/src/westminster/scripts/westminster_gtex_folds.py b/src/westminster/scripts/westminster_gtex_folds.py index aa59dad..d03742a 100755 --- a/src/westminster/scripts/westminster_gtex_folds.py +++ b/src/westminster/scripts/westminster_gtex_folds.py @@ -56,11 +56,11 @@ def main(): help="Genome FASTA for sequences [Default: %default]", ) snp_options.add_option( - '--indel_stitch', - dest='indel_stitch', + "--indel_stitch", + dest="indel_stitch", default=True, - action='store_true', - help="Stitch indel compensation shifts [Default: %default]" + action="store_true", + help="Stitch indel compensation shifts [Default: %default]", ) snp_options.add_option( "-o", @@ -406,10 +406,10 @@ def main(): # fit classifiers # SNPs - cmd_base = 'westminster_classify.py -f 8 -i 100 -r 44 -s' + cmd_base = "westminster_classify.py -f 8 -i 100 -r 44 -s" # indels # cmd_base = 'westminster_classify.py -f 6 -i 64 -r 44 -s' - cmd_base += ' --msl %d' % options.msl + cmd_base += " --msl %d" % options.msl if options.class_targets_file is not None: cmd_base += " -t %s" % options.class_targets_file diff --git a/src/westminster/scripts/westminster_train_composer.py b/src/westminster/scripts/westminster_train_composer.py index d4569cf..46624a9 100755 --- a/src/westminster/scripts/westminster_train_composer.py +++ b/src/westminster/scripts/westminster_train_composer.py @@ -60,6 +60,12 @@ def main(): default="train_out", help="Training output directory [Default: %default]", ) + train_options.add_option( + "-log_dir", + dest="log_dir", + default="log_out", + help="Tensorboard log directory [Default: %default]", + ) train_options.add_option( "--restore", dest="restore", @@ -184,6 +190,7 @@ def main(): print("Output directory %s exists. Please remove." % options.out_dir) exit(1) os.makedirs(options.out_dir, exist_ok=True) + os.makedirs(options.log_dir, exist_ok=True) # read model parameters with open(params_file) as params_open: @@ -229,6 +236,7 @@ def main(): for ci in range(options.crosses): for fi in range(num_folds): rep_dir = "%s/f%dc%d" % (options.out_dir, fi, ci) + rep_log_dir = "%s/f%dc%d" % (options.log_dir, fi, ci) train_dir = "%s/train" % rep_dir if options.restart and not options.checkpoint and os.path.isdir(train_dir): @@ -459,7 +467,7 @@ def make_rep_data(data_dir, rep_data_dir, fi, ci): ti += 1 -def options_string(options, train_options, rep_dir): +def options_string(options, train_options, rep_dir, rep_log_dir): options_str = "" for opt in train_options.option_list: @@ -485,6 +493,10 @@ def options_string(options, train_options, rep_dir): elif opt.dest == "out_dir": opt_value = "%s/train" % rep_dir + # modify + elif opt.dest == "log_dir": + opt_value = "%s/log" % rep_log_dir + # find matching restore elif opt.dest == "restore": fold_dir_mid = rep_dir.split("/")[-1] From 38faeb7bd19107ecdb809619031c265b6c2a67c7 Mon Sep 17 00:00:00 2001 From: lruizcalico Date: Tue, 23 Apr 2024 20:35:49 -0700 Subject: [PATCH 2/3] fix options --- src/westminster/scripts/westminster_train_composer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/westminster/scripts/westminster_train_composer.py b/src/westminster/scripts/westminster_train_composer.py index 46624a9..b6aa246 100755 --- a/src/westminster/scripts/westminster_train_composer.py +++ b/src/westminster/scripts/westminster_train_composer.py @@ -61,7 +61,7 @@ def main(): help="Training output directory [Default: %default]", ) train_options.add_option( - "-log_dir", + "-l", dest="log_dir", default="log_out", help="Tensorboard log directory [Default: %default]", From 1cff0e12bb94544b83afe47dacf25378416aabdf Mon Sep 17 00:00:00 2001 From: lruizcalico Date: Tue, 23 Apr 2024 20:36:41 -0700 Subject: [PATCH 3/3] fix options --- src/westminster/scripts/westminster_train_composer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/westminster/scripts/westminster_train_composer.py b/src/westminster/scripts/westminster_train_composer.py index b6aa246..4986996 100755 --- a/src/westminster/scripts/westminster_train_composer.py +++ b/src/westminster/scripts/westminster_train_composer.py @@ -250,7 +250,7 @@ def main(): # train command cmd = "python3 -m baskerville.scripts.hound_train" - cmd += " %s" % options_string(options, train_options, rep_dir) + cmd += " %s" % options_string(options, train_options, rep_dir, rep_log_dir) cmd += " %s %s" % (params_file, " ".join(rep_data_dirs)) train_jobs.append(cmd)