diff --git a/src/baskerville/HY_helper.py b/src/baskerville/HY_helper.py index f4f7878..2d8b665 100644 --- a/src/baskerville/HY_helper.py +++ b/src/baskerville/HY_helper.py @@ -3,6 +3,8 @@ import pysam import pyBigWig + + def make_seq_1hot(genome_open, chrm, start, end, seq_len): if start < 0: seq_dna = 'N'*(-start) + genome_open.fetch(chrm, 0, end) diff --git a/src/baskerville/layers.py b/src/baskerville/layers.py index 6996ce1..8f6af73 100644 --- a/src/baskerville/layers.py +++ b/src/baskerville/layers.py @@ -748,11 +748,14 @@ def get_config(self): class SqueezeExcite(tf.keras.layers.Layer): def __init__( self, - activation="relu", + activation='relu', additive=False, bottleneck_ratio=8, norm_type=None, bn_momentum=0.9, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', ): super(SqueezeExcite, self).__init__() self.activation = activation @@ -760,6 +763,9 @@ def __init__( self.norm_type = norm_type self.bn_momentum = bn_momentum self.bottleneck_ratio = bottleneck_ratio + self.kernel_initializer=kernel_initializer + self.bias_initializer=bias_initializer + self.use_bias=use_bias def build(self, input_shape): self.num_channels = input_shape[-1] @@ -778,26 +784,24 @@ def build(self, input_shape): exit(1) self.dense1 = tf.keras.layers.Dense( - units=self.num_channels // self.bottleneck_ratio, activation="relu" + units=self.num_channels // self.bottleneck_ratio, + activation="relu", + use_bias=self.use_bias, + kernel_initializer=self.kernel_initializer, + bias_initializer=self.bias_initializer, ) - self.dense2 = tf.keras.layers.Dense(units=self.num_channels, activation=None) - - # normalize - # if self.norm_type == 'batch-sync': - # self.norm = tf.keras.layers.experimental.SyncBatchNormalization( - # momentum=self.bn_momentum, gamma_initializer='zeros') - # elif self.norm_type == 'batch': - # self.norm = tf.keras.layers.BatchNormalization( - # momentum=self.bn_momentum, gamma_initializer='zeros') - # elif self.norm_type == 'layer': - # self.norm = tf.keras.layers.LayerNormalization( - # gamma_initializer='zeros') - # else: - # self.norm = None + + self.dense2 = tf.keras.layers.Dense( + units=self.num_channels, + use_bias=self.use_bias, + kernel_initializer=self.kernel_initializer, + bias_initializer=self.bias_initializer, + activation=None) def call(self, x): # activate - x = activate(x, self.activation) + if self.activation is not None: + x = activate(x, self.activation) # squeeze squeeze = self.gap(x) @@ -805,8 +809,6 @@ def call(self, x): # excite excite = self.dense1(squeeze) excite = self.dense2(excite) - # if self.norm is not None: - # excite = self.norm(excite) # scale if self.one_or_two == "one": @@ -828,14 +830,15 @@ def get_config(self): { "activation": self.activation, "additive": self.additive, + "use_bias":self.use_bias, "norm_type": self.norm_type, "bn_momentum": self.bn_momentum, "bottleneck_ratio": self.bottleneck_ratio, + 'bottleneck_size': self.num_channels // self.bottleneck_ratio, } ) return config - class GlobalContext(tf.keras.layers.Layer): def __init__(self): super(GlobalContext, self).__init__() diff --git a/src/baskerville/scripts/hound_transfer.py b/src/baskerville/scripts/hound_transfer.py index 36dbab8..0af7997 100755 --- a/src/baskerville/scripts/hound_transfer.py +++ b/src/baskerville/scripts/hound_transfer.py @@ -28,6 +28,7 @@ from baskerville import seqnn from baskerville import trainer from baskerville import layers +from baskerville import transfer_helper """ hound_transfer.py @@ -72,13 +73,32 @@ def main(): parser.add_argument( "--transfer_mode", default="full", - help="transfer method. [full, linear, adapterHoulsby, lora, lora_full, ia3]", + help="transfer method. [full, linear, adapter]", ) parser.add_argument( - "--latent", + "--att_adapter", + default=None, + type=str, + help="attention layer module [adapterHoulsby, lora, lora_full, ia3]", + ) + parser.add_argument( + "--att_latent", type=int, default=16, - help="adapter latent size.", + help="attention adapter latent size.", + ) + parser.add_argument( + "--conv_adapter", + default=None, + type=str, + help="conv layer module [conv, batch_norm, squez_excit]", + ) + + parser.add_argument( + "--se_ratio", + type=int, + default=16, + help="se bottleneck ratio.", ) parser.add_argument( "--tfr_train", @@ -105,6 +125,9 @@ def main(): if args.params_file != "%s/params.json" % args.out_dir: shutil.copy(args.params_file, "%s/params.json" % args.out_dir) + if args.transfer_mode not in ['full','linear','sparse']: + raise ValueError("transfer mode must be one of full, linear, sparse") + # read model parameters with open(args.params_file) as params_open: params = json.load(params_open) @@ -156,48 +179,112 @@ def main(): # one GPU # initialize model + params_model['verbose']=False seqnn_model = seqnn.SeqNN(params_model) # restore if args.restore: seqnn_model.restore(args.restore, trunk=args.trunk) - # transfer learning strategies + # head params + print('params in new head: %d' %transfer_helper.param_count(seqnn_model.model.layers[-2])) + + #################### + # transfer options # + #################### if args.transfer_mode=='full': seqnn_model.model.trainable=True - elif args.transfer_mode=='batch_norm': - seqnn_model.model_trunk.trainable=False - for l in seqnn_model.model.layers: - if l.name.startswith("batch_normalization"): - l.trainable=True - seqnn_model.model.summary() - elif args.transfer_mode=='linear': seqnn_model.model_trunk.trainable=False - seqnn_model.model.summary() - - elif args.transfer_mode=='adapterHoulsby': - seqnn_model.model_trunk.trainable=False - strand_pair = strand_pairs[0] - adapter_model = make_adapter_model(seqnn_model.model, strand_pair, args.latent) - seqnn_model.model = adapter_model - seqnn_model.models[0] = seqnn_model.model - seqnn_model.model_trunk = None - seqnn_model.model.summary() - - elif args.transfer_mode=='lora': - add_lora(seqnn_model.model, rank=args.latent, mode='default') - seqnn_model.model.summary() - - elif args.transfer_mode=='lora_full': - add_lora(seqnn_model.model, rank=args.latent, mode='full') - seqnn_model.model.summary() - - elif args.transfer_mode=='ia3': - add_ia3(seqnn_model.model) - seqnn_model.model.summary() - + + ############ + # adapters # + ############ + elif args.transfer_mode=='sparse': + + # attention adapter + if args.att_adapter is not None: + if args.att_adapter=='adapterHoulsby': + seqnn_model.model = transfer_helper.add_houlsby(seqnn_model.model, + strand_pairs[0], + latent_size=args.att_latent) + elif args.att_adapter=='lora': + transfer_helper.add_lora(seqnn_model.model, + rank=args.att_latent, + mode='default') + + elif args.att_adapter=='lora_full': + transfer_helper.add_lora(seqnn_model.model, + rank=args.att_latent, + mode='full') + + elif args.att_adapter=='ia3': + transfer_helper.add_ia3(seqnn_model.model) + + # conv adapter + # assume seqnn_model is appropriately frozen + if args.conv_adapter is not None: + if args.conv_adapter=='conv': + params_added = 0 + for l in seqnn_model.model.layers: + if l.name.startswith("conv1d"): + l.trainable=True + params_added += transfer_helper.param_count(l, type='trainable') + print('params added/unfrozen by conv: %d'%params_added) + + if args.conv_adapter=='conv_all': + params_added = 0 + for l in seqnn_model.model.layers: + if l.name.startswith(("conv1d","separable_conv1d")): + l.trainable=True + params_added += transfer_helper.param_count(l, type='trainable') + print('params added/unfrozen by conv_all: %d'%params_added) + + elif args.conv_adapter=='batch_norm': + params_added = 0 + for l in seqnn_model.model.layers: + if l.name.startswith("batch_normalization"): + l.trainable=True + params_added += transfer_helper.param_count(l, type='trainable') + print('params added/unfrozen by batch_norm: %d'%params_added) + + ################## + # squeeze-excite # + ################## + elif args.conv_adapter=='se': + seqnn_model.model = transfer_helper.add_se(seqnn_model.model, + strand_pair=strand_pairs[0], + bottleneck_ratio=args.se_ratio, + insert_mode='pre_att', + unfreeze_bn=False) + + elif args.conv_adapter=='se_bn': + seqnn_model.model = transfer_helper.add_se(seqnn_model.model, + strand_pair=strand_pairs[0], + bottleneck_ratio=args.se_ratio, + insert_mode='pre_att', + unfreeze_bn=True) + + elif args.conv_adapter=='se_all': + seqnn_model.model = transfer_helper.add_se(seqnn_model.model, + strand_pair=strand_pairs[0], + bottleneck_ratio=args.se_ratio, + insert_mode='all', + unfreeze_bn=False) + + elif args.conv_adapter=='se_all_bn': + seqnn_model.model = transfer_helper.add_se(seqnn_model.model, + strand_pair=strand_pairs[0], + bottleneck_ratio=args.se_ratio, + insert_mode='all', + unfreeze_bn=True) + + ################# + # final summary # + ################# + seqnn_model.model.summary() + # initialize trainer seqnn_trainer = trainer.Trainer( params_train, train_data, eval_data, args.out_dir @@ -214,6 +301,41 @@ def main(): seqnn_trainer.fit_tape(seqnn_model) else: seqnn_trainer.fit2(seqnn_model) + + ############################# + # post-training adjustments # + ############################# + if args.transfer_mode=='sparse': + + # Houlsby adapter requires architecture change, overwrite params.json file with new one + if args.att_adapter=='adapterHoulsby': + transfer_helper.modify_json(input_json=args.params_file, + output_json=args.out_dir, + adapter='houlsby', + latent_size=args.att_latent) + + # merge lora weights to original, save weight to: model_best.mergeW.h5 + # use original params.json + if args.att_adapter=='lora': + seqnn_model.model.load_weights('%s/model_best.h5'args.out_dir) + transfer_helper.merge_lora(seqnn_model.model, mode='default') + seqnn_model.save('%s/model_best.mergeW.h5'args.out_dir) + transfer_helper.var_reorder('%s/model_best.mergeW.h5'args.out_dir) + + if args.att_adapter=='lora_full': + seqnn_model.model.load_weights('%s/model_best.h5'args.out_dir) + transfer_helper.merge_lora(seqnn_model.model, mode='full') + seqnn_model.save('%s/model_best.mergeW.h5'args.out_dir) + transfer_helper.var_reorder('%s/model_best.mergeW.h5'args.out_dir) + + # merge ia3 weights to original, save weight to: model_best_mergeweight.h5 + if args.att_adapter=='ia3': + seqnn_model.model.load_weights('%s/model_best.h5'args.out_dir) + transfer_helper.merge_ia3(seqnn_model.model) + seqnn_model.save('%s/model_best.mergeW.h5'args.out_dir) + transfer_helper.var_reorder('%s/model_best.mergeW.h5'args.out_dir) + + else: ######################################## @@ -259,157 +381,6 @@ def main(): else: seqnn_trainer.fit2(seqnn_model) -def make_adapter_model(input_model, strand_pair, latent_size=16): - # take seqnn_model as input - # output a new seqnn_model object - # only the adapter, and layer_norm are trainable - - model = tf.keras.Model(inputs=input_model.input, - outputs=input_model.layers[-2].output) # remove the switch_reverse layer - - # save current graph - layer_parent_dict_old = {} # the parent layers of each layer in the old graph - for layer in model.layers: - for node in layer._outbound_nodes: - layer_name = node.outbound_layer.name - if layer_name not in layer_parent_dict_old: - layer_parent_dict_old.update({layer_name: [layer.name]}) - else: - if layer.name not in layer_parent_dict_old[layer_name]: - layer_parent_dict_old[layer_name].append(layer.name) - - layer_output_dict_new = {} # the output tensor of each layer in the new graph - layer_output_dict_new.update({model.layers[0].name: model.input}) - - # remove switch_reverse - to_fix = [i for i in layer_parent_dict_old if re.match('switch_reverse', i)] - for i in to_fix: - del layer_parent_dict_old[i] - - # Iterate over all layers after the input - model_outputs = [] - reverse_bool = None - - for layer in model.layers[1:]: - - # parent layers - parent_layers = layer_parent_dict_old[layer.name] - - # layer inputs - layer_input = [layer_output_dict_new[parent] for parent in parent_layers] - if len(layer_input) == 1: layer_input = layer_input[0] - - if re.match('stochastic_reverse_complement', layer.name): - x, reverse_bool = layer(layer_input) - - # insert adapter: - elif re.match('add', layer.name): - if any([re.match('dropout', i) for i in parent_layers]): - print('adapter added before:%s'%layer.name) - x = layers.AdapterHoulsby(latent_size=latent_size)(layer_input[1]) - x = layer([layer_input[0], x]) - else: - x = layer(layer_input) - - else: - x = layer(layer_input) - - # save the output tensor of every layer - layer_output_dict_new.update({layer.name: x}) - - final = layers.SwitchReverse(strand_pair)([layer_output_dict_new[model.layers[-1].name], reverse_bool]) - model_adapter = tf.keras.Model(inputs=model.inputs, outputs=final) - - # set layer_norm layers to trainable - for l in model_adapter.layers: - if re.match('layer_normalization', l.name): l.trainable = True - - return model_adapter - -def add_lora(input_model, rank=8, alpha=16, mode='default'): - ###################### - # inject lora layers # - ###################### - # take seqnn.model as input - # replace _q_layer, _v_layer in multihead_attention - # optionally replace _k_layer, _embedding_layer - if mode not in ['default','full']: - raise ValueError("mode must be default or full") - - for layer in input_model.layers: - if re.match('multihead_attention', layer.name): - # default loRA - layer._q_layer = layers.Lora(layer._q_layer, rank=rank, alpha=alpha, trainable=True) - layer._v_layer = layers.Lora(layer._v_layer, rank=rank, alpha=alpha, trainable=True) - # full loRA - if mode=='full': - layer._k_layer = layers.Lora(layer._k_layer, rank=rank, alpha=alpha, trainable=True) - layer._embedding_layer = layers.Lora(layer._embedding_layer, rank=rank, alpha=alpha, trainable=True) - - input_model(input_model.input) # initialize new variables - - ################# - # freeze params # - ################# - # freeze all params but lora - for layer in input_model._flatten_layers(): - lst_of_sublayers = list(layer._flatten_layers()) - if len(lst_of_sublayers) == 1: - if layer.name in ["lora_a", "lora_b"]: - layer.trainable = True - else: - layer.trainable = False - - ### bias terms need to be frozen separately - for layer in input_model.layers: - if re.match('multihead_attention', layer.name): - layer._r_w_bias = tf.Variable(layer._r_w_bias, trainable=False, name=layer._r_w_bias.name) - layer._r_r_bias = tf.Variable(layer._r_r_bias, trainable=False, name=layer._r_r_bias.name) - - # set final head to be trainable - input_model.layers[-2].trainable=True - - -def add_ia3(input_model): - ##################### - # inject ia3 layers # - ##################### - # take seqnn.model as input - # replace _k_layer, _v_layer, _embedding_layer in multihead_attention - for layer in input_model.layers: - if re.match('multihead_attention', layer.name): - layer._k_layer = layers.IA3(layer._k_layer, trainable=True) - layer._v_layer = layers.IA3(layer._v_layer, trainable=True) - layer._embedding_layer = layers.IA3(layer._embedding_layer, trainable=True) - input_model(input_model.input) # instantiate model to initialize new variables - - ################# - # freeze params # - ################# - # set ia3 to trainable - for layer in input_model._flatten_layers(): - lst_of_sublayers = list(layer._flatten_layers()) - if len(lst_of_sublayers) == 1: - if layer.name =='ia3': - layer.trainable = True - else: - layer.trainable = False - - ### bias terms need to be frozen separately - for layer in input_model.layers: - if re.match('multihead_attention', layer.name): - layer._r_w_bias = tf.Variable(layer._r_w_bias, trainable=False, name=layer._r_w_bias.name) - layer._r_r_bias = tf.Variable(layer._r_r_bias, trainable=False, name=layer._r_r_bias.name) - - # set final head to be trainable - input_model.layers[-2].trainable=True - -def param_count(model): - trainable = int(sum(tf.keras.backend.count_params(w) for w in model.trainable_weights)) - non_trainable = int(sum(tf.keras.backend.count_params(w) for w in model.non_trainable_weights)) - print('total params:%d' %(trainable + non_trainable)) - print('trainable params:%d' %trainable) - print('non-trainable params:%d' %non_trainable) ################################################################################ # __main__ diff --git a/src/baskerville/scripts/westminster_train_folds_copy.py b/src/baskerville/scripts/westminster_train_folds_copy.py deleted file mode 100755 index 777d784..0000000 --- a/src/baskerville/scripts/westminster_train_folds_copy.py +++ /dev/null @@ -1,530 +0,0 @@ -#!/usr/bin/env python -# Copyright 2019 Calico LLC - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# https://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========================================================================= - -from optparse import OptionParser, OptionGroup -import glob -import json -import os -import pdb -import shutil - -from natsort import natsorted - -import slurm - -""" -westminster_train_folds.py - -Train baskerville model replicates on cross folds using given parameters and data. -""" - -################################################################################ -# main -################################################################################ -def main(): - usage = 'usage: %prog [options] ...' - parser = OptionParser(usage) - - # train - train_options = OptionGroup(parser, 'houndtrain.py options') - train_options.add_option('-k', dest='keras_fit', - default=False, action='store_true', - help='Train with Keras fit method [Default: %default]') - train_options.add_option('-m', dest='mixed_precision', - default=False, action='store_true', - help='Train with mixed precision [Default: %default]') - train_options.add_option('-o', dest='out_dir', - default='train_out', - help='Training output directory [Default: %default]') - train_options.add_option('--restore', dest='restore', - help='Restore model and continue training, from existing fold train dir [Default: %default]') - train_options.add_option('--trunk', dest='trunk', - default=False, action='store_true', - help='Restore only model trunk [Default: %default]') - train_options.add_option('--tfr_train', dest='tfr_train_pattern', - default=None, - help='Training TFR pattern string appended to data_dir/tfrecords for subsetting [Default: %default]') - train_options.add_option('--tfr_eval', dest='tfr_eval_pattern', - default=None, - help='Evaluation TFR pattern string appended to data_dir/tfrecords for subsetting [Default: %default]') - parser.add_option_group(train_options) - - # transfer options - transfer_options = OptionGroup(parser, 'transfer options') - transfer_options.add_option('--transfer', dest='transfer', - default=False, action='store_true', - help='whether to do transfer learning.') - transfer_options.add_option('--pretrain', dest='pretrain', - default=None, help='path to pretrained model trunk.') - transfer_options.add_option('--transfer_mode', dest='transfer_mode', - default='linear', help='transfer method.') - transfer_options.add_option('--latent', dest='latent', type='int', - default=0, help='latent size. ') - - # eval - eval_options = OptionGroup(parser, 'hound_eval.py options') - eval_options.add_option('--rank', dest='rank_corr', - default=False, action='store_true', - help='Compute Spearman rank correlation [Default: %default]') - eval_options.add_option('--rc', dest='rc', - default=False, action='store_true', - help='Average forward and reverse complement predictions [Default: %default]') - eval_options.add_option('--shifts', dest='shifts', - default='0', type='str', - help='Ensemble prediction shifts [Default: %default]') - parser.add_option('--step', dest='step', - default=1, type='int', - help='Spatial step for specificity/spearmanr [Default: %default]') - parser.add_option_group(eval_options) - - # multi - rep_options = OptionGroup(parser, 'replication options') - rep_options.add_option('-c', dest='crosses', - default=1, type='int', - help='Number of cross-fold rounds [Default:%default]') - rep_options.add_option('--checkpoint', dest='checkpoint', - default=False, action='store_true', - help='Restart training from checkpoint [Default: %default]') - rep_options.add_option('-e', dest='conda_env', - default='tf2.12', - help='Anaconda environment [Default: %default]') - rep_options.add_option('-f', dest='fold_subset', - default=None, type='int', - help='Run a subset of folds [Default:%default]') - rep_options.add_option('--name', dest='name', - default='fold', help='SLURM name prefix [Default: %default]') - rep_options.add_option('-p', dest='processes', - default=None, type='int', - help='Number of processes, passed by multi script') - rep_options.add_option('-q', dest='queue', - default='titan_rtx', - help='SLURM queue on which to run the jobs [Default: %default]') - rep_options.add_option('-r', '--restart', dest='restart', - default=False, action='store_true') - rep_options.add_option('--setup', dest='setup', - default=False, action='store_true', - help='Setup folds data directory only [Default: %default]') - rep_options.add_option('--spec_off', dest='spec_off', - default=False, action='store_true') - rep_options.add_option('--eval_off', dest='eval_off', - default=False, action='store_true') - rep_options.add_option('--eval_train_off', dest='eval_train_off', - default=False, action='store_true') - parser.add_option_group(rep_options) - - (options, args) = parser.parse_args() - - if len(args) < 2: - parser.error('Must provide parameters and data directory.') - else: - params_file = os.path.abspath(args[0]) - data_dirs = [os.path.abspath(arg) for arg in args[1:]] - - ####################################################### - # prep work - - if not options.restart and os.path.isdir(options.out_dir): - print('Output directory %s exists. Please remove.' % options.out_dir) - exit(1) - os.makedirs(options.out_dir, exist_ok=True) - - # read model parameters - with open(params_file) as params_open: - params = json.load(params_open) - params_train = params['train'] - - # copy params into output directory - shutil.copy(params_file, '%s/params.json' % options.out_dir) - - # read data parameters - num_data = len(data_dirs) - data_stats_file = '%s/statistics.json' % data_dirs[0] - with open(data_stats_file) as data_stats_open: - data_stats = json.load(data_stats_open) - - # count folds - num_folds = len([dkey for dkey in data_stats if dkey.startswith('fold')]) - - # subset folds - if options.fold_subset is not None: - num_folds = min(options.fold_subset, num_folds) - - if options.queue == 'standard': - num_cpu = 8 - num_gpu = 0 - time_base = 64 - else: - num_cpu = 2 - num_gpu = 1 - time_base = 24 - - # arrange data - for ci in range(options.crosses): - for fi in range(num_folds): - rep_dir = '%s/f%dc%d' % (options.out_dir, fi, ci) - os.makedirs(rep_dir, exist_ok=True) - - # make data directories - for di in range(num_data): - rep_data_dir = '%s/data%d' % (rep_dir, di) - if not os.path.isdir(rep_data_dir): - make_rep_data(data_dirs[di], rep_data_dir, fi, ci) - - if options.setup: - exit(0) - - cmd_source = 'source /home/yuanh/.bashrc;' - hound_train = 'hound_train.py' - ####################################################### - # train - - jobs = [] - - for ci in range(options.crosses): - for fi in range(num_folds): - rep_dir = '%s/f%dc%d' % (options.out_dir, fi, ci) - - train_dir = '%s/train' % rep_dir - if options.restart and not options.checkpoint and os.path.isdir(train_dir): - print('%s found and skipped.' % rep_dir) - - else: - # collect data directories - rep_data_dirs = [] - for di in range(num_data): - rep_data_dirs.append('%s/data%d' % (rep_dir, di)) - - # if options.checkpoint: - # os.rename('%s/train.out' % rep_dir, '%s/train1.out' % rep_dir) - - # train command - cmd = cmd_source - cmd += ' conda activate %s;' % options.conda_env - cmd += ' echo $HOSTNAME;' - - cmd += ' %s' %hound_train - cmd += ' %s' % options_string(options, train_options, rep_dir) - - # transfer learning options - if options.transfer: - cmd += ' --restore %s/f%dc%d.h5' % (options.pretrain, fi, ci) - cmd += ' --trunk' - cmd += ' --transfer_mode %s' % options.transfer_mode - if options.latent!=0: - cmd += ' --latent %d' % options.latent - - cmd += ' %s %s' % (params_file, ' '.join(rep_data_dirs)) - - name = '%s-train-f%dc%d' % (options.name, fi, ci) - sbf = os.path.abspath('%s/train.sb' % rep_dir) - outf = os.path.abspath('%s/train.%%j.out' % rep_dir) - errf = os.path.abspath('%s/train.%%j.err' % rep_dir) - - j = slurm.Job(cmd, name, - outf, errf, sbf, - queue=options.queue, - cpu=4, - gpu=params_train.get('num_gpu',1), - mem=30000, time='60-0:0:0') - jobs.append(j) - - slurm.multi_run(jobs, max_proc=options.processes, verbose=True, - launch_sleep=10, update_sleep=60) - - - ####################################################### - # evaluate training set - - jobs = [] - - if not options.eval_train_off: - for ci in range(options.crosses): - for fi in range(num_folds): - it_dir = '%s/f%dc%d' % (options.out_dir, fi, ci) - - for di in range(num_data): - if num_data == 1: - out_dir = '%s/eval_train' % it_dir - model_file = '%s/train/model_check.h5' % it_dir - else: - out_dir = '%s/eval%d_train' % (it_dir, di) - model_file = '%s/train/model%d_check.h5' % (it_dir, di) - - # check if done - acc_file = '%s/acc.txt' % out_dir - if os.path.isfile(acc_file): - print('%s already generated.' % acc_file) - else: - # hound evaluate - cmd = cmd_source - cmd += ' conda activate %s;' % options.conda_env - cmd += ' echo $HOSTNAME;' - cmd += ' hound_eval.py' - cmd += ' --head %d' % di - cmd += ' -o %s' % out_dir - if options.rc: - cmd += ' --rc' - if options.shifts: - cmd += ' --shifts %s' % options.shifts - cmd += ' --split train' - cmd += ' %s' % params_file - cmd += ' %s' % model_file - cmd += ' %s/data%d' % (it_dir, di) - - name = '%s-evaltr-f%dc%d' % (options.name, fi, ci) - job = slurm.Job(cmd, - name=name, - out_file='%s.out'%out_dir, - err_file='%s.err'%out_dir, - queue=options.queue, - cpu=num_cpu, gpu=num_gpu, - mem=30000, - time='%d:00:00' % (3*time_base)) - jobs.append(job) - - - ####################################################### - # evaluate test set - - if not options.eval_off: - for ci in range(options.crosses): - for fi in range(num_folds): - it_dir = '%s/f%dc%d' % (options.out_dir, fi, ci) - - for di in range(num_data): - if num_data == 1: - out_dir = '%s/eval' % it_dir - model_file = '%s/train/model_best.h5' % it_dir - else: - out_dir = '%s/eval%d' % (it_dir, di) - model_file = '%s/train/model%d_best.h5' % (it_dir, di) - - # check if done - acc_file = '%s/acc.txt' % out_dir - if os.path.isfile(acc_file): - print('%s already generated.' % acc_file) - else: - cmd = cmd_source - cmd += ' conda activate %s;' % options.conda_env - cmd += ' echo $HOSTNAME;' - cmd += ' hound_eval.py' - cmd += ' --head %d' % di - cmd += ' -o %s' % out_dir - if options.rc: - cmd += ' --rc' - if options.shifts: - cmd += ' --shifts %s' % options.shifts - if options.rank_corr: - cmd += ' --rank' - cmd += ' --step %d' % options.step - cmd += ' %s' % params_file - cmd += ' %s' % model_file - cmd += ' %s/data%d' % (it_dir, di) - - name = '%s-eval-f%dc%d' % (options.name, fi, ci) - job = slurm.Job(cmd, - name=name, - out_file='%s.out'%out_dir, - err_file='%s.err'%out_dir, - queue=options.queue, - cpu=num_cpu, gpu=num_gpu, - mem=30000, - time='%d:00:00' % time_base) - jobs.append(job) - - ####################################################### - # evaluate test specificity - - if not options.spec_off: - for ci in range(options.crosses): - for fi in range(num_folds): - it_dir = '%s/f%dc%d' % (options.out_dir, fi, ci) - - for di in range(num_data): - if num_data == 1: - out_dir = '%s/eval_spec' % it_dir - model_file = '%s/train/model_best.h5' % it_dir - else: - out_dir = '%s/eval%d_spec' % (it_dir, di) - model_file = '%s/train/model%d_best.h5' % (it_dir, di) - - # check if done - acc_file = '%s/acc.txt' % out_dir - if os.path.isfile(acc_file): - print('%s already generated.' % acc_file) - else: - cmd = cmd_source - cmd += ' conda activate %s;' % options.conda_env - cmd += ' echo $HOSTNAME;' - cmd += ' hound_eval_spec.py' - cmd += ' --head %d' % di - cmd += ' -o %s' % out_dir - cmd += ' --step %d' % options.step - if options.rc: - cmd += ' --rc' - if options.shifts: - cmd += ' --shifts %s' % options.shifts - cmd += ' %s' % params_file - cmd += ' %s' % model_file - cmd += ' %s/data%d' % (it_dir, di) - - name = '%s-spec-f%dc%d' % (options.name, fi, ci) - job = slurm.Job(cmd, - name=name, - out_file='%s.out'%out_dir, - err_file='%s.err'%out_dir, - queue=options.queue, - cpu=num_cpu, gpu=num_gpu, - mem=150000, - time='%d:00:00' % (5*time_base)) - jobs.append(job) - - slurm.multi_run(jobs, max_proc=options.processes, verbose=True, - launch_sleep=10, update_sleep=60) - - -def make_rep_data(data_dir, rep_data_dir, fi, ci): - # read data parameters - data_stats_file = '%s/statistics.json' % data_dir - with open(data_stats_file) as data_stats_open: - data_stats = json.load(data_stats_open) - - # sequences per fold - fold_seqs = [] - dfi = 0 - while 'fold%d_seqs'%dfi in data_stats: - fold_seqs.append(data_stats['fold%d_seqs'%dfi]) - del data_stats['fold%d_seqs'%dfi] - dfi += 1 - num_folds = dfi - - # split folds into train/valid/test - test_fold = fi - valid_fold = (fi+1+ci) % num_folds - train_folds = [fold for fold in range(num_folds) if fold not in [valid_fold,test_fold]] - - # clear existing directory - if os.path.isdir(rep_data_dir): - shutil.rmtree(rep_data_dir) - - # make data directory - os.makedirs(rep_data_dir, exist_ok=True) - - # dump data stats - data_stats['test_seqs'] = fold_seqs[test_fold] - data_stats['valid_seqs'] = fold_seqs[valid_fold] - data_stats['train_seqs'] = sum([fold_seqs[tf] for tf in train_folds]) - with open('%s/statistics.json'%rep_data_dir, 'w') as data_stats_open: - json.dump(data_stats, data_stats_open, indent=4) - - # set sequence tvt - try: - seqs_bed_out = open('%s/sequences.bed'%rep_data_dir, 'w') - for line in open('%s/sequences.bed'%data_dir): - a = line.split() - sfi = int(a[-1].replace('fold','')) - if sfi == test_fold: - a[-1] = 'test' - elif sfi == valid_fold: - a[-1] = 'valid' - else: - a[-1] = 'train' - print('\t'.join(a), file=seqs_bed_out) - seqs_bed_out.close() - except (ValueError, FileNotFoundError): - pass - - # copy targets - shutil.copy('%s/targets.txt'%data_dir, '%s/targets.txt'%rep_data_dir) - - # sym link tfrecords - rep_tfr_dir = '%s/tfrecords' % rep_data_dir - os.mkdir(rep_tfr_dir) - - # test tfrecords - ti = 0 - test_tfrs = natsorted(glob.glob('%s/tfrecords/fold%d-*.tfr' % (data_dir, test_fold))) - for test_tfr in test_tfrs: - test_tfr = os.path.abspath(test_tfr) - test_rep_tfr = '%s/test-%d.tfr' % (rep_tfr_dir, ti) - os.symlink(test_tfr, test_rep_tfr) - ti += 1 - - # valid tfrecords - ti = 0 - valid_tfrs = natsorted(glob.glob('%s/tfrecords/fold%d-*.tfr' % (data_dir, valid_fold))) - for valid_tfr in valid_tfrs: - valid_tfr = os.path.abspath(valid_tfr) - valid_rep_tfr = '%s/valid-%d.tfr' % (rep_tfr_dir, ti) - os.symlink(valid_tfr, valid_rep_tfr) - ti += 1 - - # train tfrecords - ti = 0 - train_tfrs = [] - for tfi in train_folds: - train_tfrs += natsorted(glob.glob('%s/tfrecords/fold%d-*.tfr' % (data_dir, tfi))) - for train_tfr in train_tfrs: - train_tfr = os.path.abspath(train_tfr) - train_rep_tfr = '%s/train-%d.tfr' % (rep_tfr_dir, ti) - os.symlink(train_tfr, train_rep_tfr) - ti += 1 - - -def options_string(options, train_options, rep_dir): - options_str = '' - - for opt in train_options.option_list: - opt_str = opt.get_opt_string() - opt_value = options.__dict__[opt.dest] - - # wrap askeriks in "" - if type(opt_value) == str and opt_value.find('*') != -1: - opt_value = '"%s"' % opt_value - - # no value for bools - elif type(opt_value) == bool: - if not opt_value: - opt_str = '' - opt_value = '' - - # skip Nones - elif opt_value is None: - opt_str = '' - opt_value = '' - - # modify - elif opt.dest == 'out_dir': - opt_value = '%s/train' % rep_dir - - # find matching restore - elif opt.dest == 'restore': - fold_dir_mid = rep_dir.split('/')[-1] - if options.trunk: - opt_value = '%s/%s/train/model_trunk.h5' % (opt_value, fold_dir_mid) - else: - opt_value = '%s/%s/train/model_best.h5' % (opt_value, fold_dir_mid) - - options_str += ' %s %s' % (opt_str, opt_value) - - return options_str - - -################################################################################ -# __main__ -################################################################################ -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/src/baskerville/trainer.py b/src/baskerville/trainer.py index 6503815..d7c048e 100644 --- a/src/baskerville/trainer.py +++ b/src/baskerville/trainer.py @@ -723,8 +723,8 @@ def make_optimizer(self, loss_scale=False): else: # schedule (currently OFF) initial_learning_rate = self.params.get("learning_rate", 0.01) - if False: - lr_schedule = keras.optimizers.schedules.ExponentialDecay( + if self.params.get("decay_steps"): + lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate, decay_steps=self.params.get("decay_steps", 100000), decay_rate=self.params.get("decay_rate", 0.96), @@ -778,7 +778,7 @@ def make_optimizer(self, loss_scale=False): global_clipnorm=global_clipnorm, amsgrad=False, ) # reduces performance in my experience - + elif optimizer_type in ["sgd", "momentum"]: self.optimizer = tf.keras.optimizers.SGD( learning_rate=lr_schedule,