diff --git a/src/westminster/scripts/westminster_classify.py b/src/westminster/scripts/westminster_classify.py index db1b2cd..5f5b215 100755 --- a/src/westminster/scripts/westminster_classify.py +++ b/src/westminster/scripts/westminster_classify.py @@ -35,11 +35,12 @@ def main(): help="Take features absolute value [Default: %default]", ) parser.add_option( - '-f', - dest='num_folds', + "-f", + dest="num_folds", default=8, - type='int', - help='Cross-validation folds [Default: %default]') + type="int", + help="Cross-validation folds [Default: %default]", + ) parser.add_option( "-i", dest="iterations", diff --git a/src/westminster/scripts/westminster_gtex_coef.py b/src/westminster/scripts/westminster_gtex_coef.py index f9f791b..d83c0a0 100755 --- a/src/westminster/scripts/westminster_gtex_coef.py +++ b/src/westminster/scripts/westminster_gtex_coef.py @@ -39,11 +39,12 @@ def main(): help="GTEx VCF directory", ) parser.add_argument( - '-m', - '--min_variants', + "-m", + "--min_variants", default=32, type=int, - help='Minimum number of variants for tissue to be included') + help="Minimum number of variants for tissue to be included", + ) parser.add_argument( "-p", "--plot", action="store_true", help="Generate tissue prediction plots" ) @@ -104,11 +105,15 @@ def main(): gtex_scores_file = f"{args.gtex_dir}/{tissue}_pos/scores.h5" try: variant_scores = read_scores( - gtex_scores_file, keyword, eqtl_df, args.snp_stat, verbose=args.verbose + gtex_scores_file, + keyword, + eqtl_df, + args.snp_stat, + verbose=args.verbose, ) variant_scores = variant_scores[eqtl_df.consistent] except TypeError: - print(f'Tracks matching {tissue} are missing', file=sys.stderr) + print(f"Tracks matching {tissue} are missing", file=sys.stderr) continue # compute sign AUROCs diff --git a/src/westminster/scripts/westminster_train_composer.py b/src/westminster/scripts/westminster_train_composer.py index d4569cf..4986996 100755 --- a/src/westminster/scripts/westminster_train_composer.py +++ b/src/westminster/scripts/westminster_train_composer.py @@ -60,6 +60,12 @@ def main(): default="train_out", help="Training output directory [Default: %default]", ) + train_options.add_option( + "-l", + dest="log_dir", + default="log_out", + help="Tensorboard log directory [Default: %default]", + ) train_options.add_option( "--restore", dest="restore", @@ -184,6 +190,7 @@ def main(): print("Output directory %s exists. Please remove." % options.out_dir) exit(1) os.makedirs(options.out_dir, exist_ok=True) + os.makedirs(options.log_dir, exist_ok=True) # read model parameters with open(params_file) as params_open: @@ -229,6 +236,7 @@ def main(): for ci in range(options.crosses): for fi in range(num_folds): rep_dir = "%s/f%dc%d" % (options.out_dir, fi, ci) + rep_log_dir = "%s/f%dc%d" % (options.log_dir, fi, ci) train_dir = "%s/train" % rep_dir if options.restart and not options.checkpoint and os.path.isdir(train_dir): @@ -242,7 +250,7 @@ def main(): # train command cmd = "python3 -m baskerville.scripts.hound_train" - cmd += " %s" % options_string(options, train_options, rep_dir) + cmd += " %s" % options_string(options, train_options, rep_dir, rep_log_dir) cmd += " %s %s" % (params_file, " ".join(rep_data_dirs)) train_jobs.append(cmd) @@ -459,7 +467,7 @@ def make_rep_data(data_dir, rep_data_dir, fi, ci): ti += 1 -def options_string(options, train_options, rep_dir): +def options_string(options, train_options, rep_dir, rep_log_dir): options_str = "" for opt in train_options.option_list: @@ -485,6 +493,10 @@ def options_string(options, train_options, rep_dir): elif opt.dest == "out_dir": opt_value = "%s/train" % rep_dir + # modify + elif opt.dest == "log_dir": + opt_value = "%s/log" % rep_log_dir + # find matching restore elif opt.dest == "restore": fold_dir_mid = rep_dir.split("/")[-1]