From 75ad09ca31836114f9abf5268502ca4cd24fee25 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 2 Aug 2021 18:40:46 -0400 Subject: [PATCH 01/22] ignore downloaded files --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 8e99f6cf..e7f5eb86 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ basenji.egg-info/ */._.DS_Store **/.ipynb_checkpoints/ data/hg19.fa* +data/* +manuscripts/basset/model_basset \ No newline at end of file From 73993ba062f8fb72a83fba5fc4a007d8d15dfeac Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 2 Aug 2021 18:40:54 -0400 Subject: [PATCH 02/22] display basset default model --- manuscripts/basset/layers.txt | 98 +++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 manuscripts/basset/layers.txt diff --git a/manuscripts/basset/layers.txt b/manuscripts/basset/layers.txt new file mode 100644 index 00000000..e54fb7da --- /dev/null +++ b/manuscripts/basset/layers.txt @@ -0,0 +1,98 @@ +Model: "model_1" +__________________________________________________________________________________________________ +Layer (type) Output Shape Param # Connected to +================================================================================================== +sequence (InputLayer) [(None, 1344, 4)] 0 +__________________________________________________________________________________________________ +stochastic_reverse_complement ( ((None, 1344, 4), () 0 sequence[0][0] +__________________________________________________________________________________________________ +stochastic_shift (StochasticShi (None, 1344, 4) 0 stochastic_reverse_complement[0][ +__________________________________________________________________________________________________ +gelu (GELU) (None, 1344, 4) 0 stochastic_shift[0][0] +__________________________________________________________________________________________________ +conv1d (Conv1D) (None, 1344, 288) 19584 gelu[0][0] +__________________________________________________________________________________________________ +batch_normalization (BatchNorma (None, 1344, 288) 1152 conv1d[0][0] +__________________________________________________________________________________________________ +max_pooling1d (MaxPooling1D) (None, 448, 288) 0 batch_normalization[0][0] +__________________________________________________________________________________________________ +gelu_1 (GELU) (None, 448, 288) 0 max_pooling1d[0][0] +__________________________________________________________________________________________________ +conv1d_1 (Conv1D) (None, 448, 288) 414720 gelu_1[0][0] +__________________________________________________________________________________________________ +batch_normalization_1 (BatchNor (None, 448, 288) 1152 conv1d_1[0][0] +__________________________________________________________________________________________________ +max_pooling1d_1 (MaxPooling1D) (None, 224, 288) 0 batch_normalization_1[0][0] +__________________________________________________________________________________________________ +gelu_2 (GELU) (None, 224, 288) 0 max_pooling1d_1[0][0] +__________________________________________________________________________________________________ +conv1d_2 (Conv1D) (None, 224, 323) 465120 gelu_2[0][0] +__________________________________________________________________________________________________ +batch_normalization_2 (BatchNor (None, 224, 323) 1292 conv1d_2[0][0] +__________________________________________________________________________________________________ +max_pooling1d_2 (MaxPooling1D) (None, 112, 323) 0 batch_normalization_2[0][0] +__________________________________________________________________________________________________ +gelu_3 (GELU) (None, 112, 323) 0 max_pooling1d_2[0][0] +__________________________________________________________________________________________________ +conv1d_3 (Conv1D) (None, 112, 363) 586245 gelu_3[0][0] +__________________________________________________________________________________________________ +batch_normalization_3 (BatchNor (None, 112, 363) 1452 conv1d_3[0][0] +__________________________________________________________________________________________________ +max_pooling1d_3 (MaxPooling1D) (None, 56, 363) 0 batch_normalization_3[0][0] +__________________________________________________________________________________________________ +gelu_4 (GELU) (None, 56, 363) 0 max_pooling1d_3[0][0] +__________________________________________________________________________________________________ +conv1d_4 (Conv1D) (None, 56, 407) 738705 gelu_4[0][0] +__________________________________________________________________________________________________ +batch_normalization_4 (BatchNor (None, 56, 407) 1628 conv1d_4[0][0] +__________________________________________________________________________________________________ +max_pooling1d_4 (MaxPooling1D) (None, 28, 407) 0 batch_normalization_4[0][0] +__________________________________________________________________________________________________ +gelu_5 (GELU) (None, 28, 407) 0 max_pooling1d_4[0][0] +__________________________________________________________________________________________________ +conv1d_5 (Conv1D) (None, 28, 456) 927960 gelu_5[0][0] +__________________________________________________________________________________________________ +batch_normalization_5 (BatchNor (None, 28, 456) 1824 conv1d_5[0][0] +__________________________________________________________________________________________________ +max_pooling1d_5 (MaxPooling1D) (None, 14, 456) 0 batch_normalization_5[0][0] +__________________________________________________________________________________________________ +gelu_6 (GELU) (None, 14, 456) 0 max_pooling1d_5[0][0] +__________________________________________________________________________________________________ +conv1d_6 (Conv1D) (None, 14, 512) 1167360 gelu_6[0][0] +__________________________________________________________________________________________________ +batch_normalization_6 (BatchNor (None, 14, 512) 2048 conv1d_6[0][0] +__________________________________________________________________________________________________ +max_pooling1d_6 (MaxPooling1D) (None, 7, 512) 0 batch_normalization_6[0][0] +__________________________________________________________________________________________________ +gelu_7 (GELU) (None, 7, 512) 0 max_pooling1d_6[0][0] +__________________________________________________________________________________________________ +conv1d_7 (Conv1D) (None, 7, 256) 131072 gelu_7[0][0] +__________________________________________________________________________________________________ +batch_normalization_7 (BatchNor (None, 7, 256) 1024 conv1d_7[0][0] +__________________________________________________________________________________________________ +gelu_8 (GELU) (None, 7, 256) 0 batch_normalization_7[0][0] +__________________________________________________________________________________________________ +reshape (Reshape) (None, 1, 1792) 0 gelu_8[0][0] +__________________________________________________________________________________________________ +dense (Dense) (None, 1, 768) 1376256 reshape[0][0] +__________________________________________________________________________________________________ +batch_normalization_8 (BatchNor (None, 1, 768) 3072 dense[0][0] +__________________________________________________________________________________________________ +dropout (Dropout) (None, 1, 768) 0 batch_normalization_8[0][0] +__________________________________________________________________________________________________ +gelu_9 (GELU) (None, 1, 768) 0 dropout[0][0] +__________________________________________________________________________________________________ +dense_1 (Dense) (None, 1, 164) 126116 gelu_9[0][0] +__________________________________________________________________________________________________ +switch_reverse (SwitchReverse) (None, 1, 164) 0 dense_1[0][0] + stochastic_reverse_complement[0][ +================================================================================================== +Total params: 5,967,782 +Trainable params: 5,960,460 +Non-trainable params: 7,322 +__________________________________________________________________________________________________ +None +model_strides [192] +target_lengths [1] +target_crops [3] +No checkpoints found. From 8548ec06d3a20b21e64b7ac5e1e27f1b43229579 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 2 Aug 2021 18:41:09 -0400 Subject: [PATCH 03/22] directly invoke python in script --- manuscripts/basset/make_dataset.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manuscripts/basset/make_dataset.sh b/manuscripts/basset/make_dataset.sh index d11dca26..f8a20925 100755 --- a/manuscripts/basset/make_dataset.sh +++ b/manuscripts/basset/make_dataset.sh @@ -16,4 +16,4 @@ then mv wgEncodeHg19ConsensusSignalArtifactRegions.bed data/ fi -basenji_data.py -b $blacklist --local -p 8 -r 4096 -w 192 -l 1344 --peaks -v .12 -t .12 --stride 192 --stride_test 192 --crop 576 -o data_basset $fasta data/targets.txt +python3 ../../bin/basenji_data.py -b $blacklist --local -p 8 -r 4096 -w 192 -l 1344 --peaks -v .12 -t .12 --stride 192 --stride_test 192 --crop 576 -o data_basset $fasta data/targets.txt From 198edb08bbfe113928d168b15a0b818a4dd5fb66 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 2 Aug 2021 18:41:30 -0400 Subject: [PATCH 04/22] misc cleanup files --- manuscripts/basset/models/heart/params.json | 57 +++++++++++++++++++++ manuscripts/basset/path.rc | 1 + 2 files changed, 58 insertions(+) create mode 100644 manuscripts/basset/models/heart/params.json create mode 100644 manuscripts/basset/path.rc diff --git a/manuscripts/basset/models/heart/params.json b/manuscripts/basset/models/heart/params.json new file mode 100644 index 00000000..9c0a3f8d --- /dev/null +++ b/manuscripts/basset/models/heart/params.json @@ -0,0 +1,57 @@ +{ + "train": { + "batch_size": 64, + "shuffle_buffer": 8192, + "optimizer": "sgd", + "loss": "bce", + "learning_rate": 0.005, + "momentum": 0.98, + "patience": 12, + "train_epochs_min": 10 + }, + "model": { + "seq_length": 1344, + + "augment_rc": true, + "augment_shift": 3, + + "activation": "gelu", + "batch_norm": true, + "bn_momentum": 0.90, + + "trunk": [ + { + "name": "conv_block", + "filters": 288, + "kernel_size": 17, + "pool_size": 3 + }, + { + "name": "conv_tower", + "filters_init": 288, + "filters_mult": 1.122, + "kernel_size": 5, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "conv_block", + "filters": 256, + "kernel_size": 1 + }, + { + "name": "dense_block", + "flatten": true, + "units": 768, + "dropout": 0.2 + } + ], + "head": [ + { + "name": "final", + "units": 164, + "activation": "sigmoid" + } + ] + } +} diff --git a/manuscripts/basset/path.rc b/manuscripts/basset/path.rc new file mode 100644 index 00000000..0d5d6a06 --- /dev/null +++ b/manuscripts/basset/path.rc @@ -0,0 +1 @@ +export PATH=$PATH:/home/moody/basset/basenji/bin \ No newline at end of file From 9781599162b6edacd8615ed887732dd12d737c87 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Wed, 4 Aug 2021 11:54:21 -0400 Subject: [PATCH 05/22] annotated basset configs --- manuscripts/basset/generic_params.json | 57 ++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 manuscripts/basset/generic_params.json diff --git a/manuscripts/basset/generic_params.json b/manuscripts/basset/generic_params.json new file mode 100644 index 00000000..98aed24a --- /dev/null +++ b/manuscripts/basset/generic_params.json @@ -0,0 +1,57 @@ +{ + "train": { + "batch_size": 64 [int], + "shuffle_buffer": 8192 [int], + "optimizer": "sgd" [enum], + "loss": "bce" [enum], + "learning_rate": 0.005 [float], + "momentum": 0.98 [float], + "patience": 12 [int], + "train_epochs_min": 10 [int] + }, + "model": { + "seq_length": 1344 [int], + + "augment_rc": true [bool], + "augment_shift": 3 [int], + + "activation": "gelu" [enum], + "batch_norm": true [bool], + "bn_momentum": 0.90 [float], + + "trunk": [ + { + "name": "conv_block", + "filters": 288, + "kernel_size": 17, + "pool_size": 3 + }, + { + "name": "conv_tower", + "filters_init": 288, + "filters_mult": 1.122, + "kernel_size": 5, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "conv_block", + "filters": 256, + "kernel_size": 1 + }, + { + "name": "dense_block", + "flatten": true, + "units": 768, + "dropout": 0.2 + } + ] [array: SeqNN block ()], + "head": [ + { + "name": "final", + "units": 164, + "activation": "sigmoid" + } + ] + } +} From ff867362e7c50102cf9ea74c7a84e991a12ce7fe Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Wed, 4 Aug 2021 11:55:18 -0400 Subject: [PATCH 06/22] docstring build_block --- basenji/seqnn.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/basenji/seqnn.py b/basenji/seqnn.py index b2a12b93..f26d2fba 100644 --- a/basenji/seqnn.py +++ b/basenji/seqnn.py @@ -43,10 +43,19 @@ def set_defaults(self): self.augment_shift = [0] def build_block(self, current, block_params): - """Construct a SeqNN block. + """ + Construct a SeqNN layer, and set it's previous layer to be the `current` + parameter. + Args: + current (tf.keras.layers.Layer): The previous later to attach this new + block to. + + block_params (dict): The parameters for this specific block, in order + of the elements of model[trunk] + Returns: - current + [type]: [description] """ block_args = {} From 94e0f329380f86c69e99dc69c4f0ba5c42662bc8 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Wed, 4 Aug 2021 11:55:38 -0400 Subject: [PATCH 07/22] global_var in build_block explanation --- basenji/seqnn.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/basenji/seqnn.py b/basenji/seqnn.py index f26d2fba..6a4d5686 100644 --- a/basenji/seqnn.py +++ b/basenji/seqnn.py @@ -73,6 +73,8 @@ def build_block(self, current, block_params): block_varnames = block_func.__init__.__code__.co_varnames # set global defaults + # try to see if the SeqNN object has the following attributes: + # if they do, put them in a dictionary global_vars = ['activation', 'batch_norm', 'bn_momentum', 'bn_type', 'l2_scale', 'l1_scale', 'padding', 'kernel_initializer'] for gv in global_vars: From 6875716f59e265755baa37a3a61ecbf7b5b1d0f9 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Wed, 4 Aug 2021 11:56:08 -0400 Subject: [PATCH 08/22] builkd_model documentation --- basenji/seqnn.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/basenji/seqnn.py b/basenji/seqnn.py index 6a4d5686..28035f8f 100644 --- a/basenji/seqnn.py +++ b/basenji/seqnn.py @@ -30,8 +30,11 @@ class SeqNN(): def __init__(self, params): self.set_defaults() + print("===+===") for key, value in params.items(): + print(key,value) self.__setattr__(key, value) + print("===+===") self.build_model() self.ensemble = None self.embed = None @@ -56,7 +59,7 @@ def build_block(self, current, block_params): Returns: [type]: [description] - """ + """ block_args = {} # extract name @@ -98,6 +101,16 @@ def build_block(self, current, block_params): return current def build_model(self, save_reprs=False): + """ + Using the functional API for tensorflow, programmatically builds a neural + network, according to the attributes set under the 'model' key in + the config JSON. + + see generic_params.json for further details + + Args: + save_reprs (bool, optional): [description]. Defaults to False. + """ ################################################### # inputs ################################################### @@ -114,6 +127,9 @@ def build_model(self, save_reprs=False): ################################################### # build convolution blocks ################################################### + # iterate through the elements of model[trunk], generate a layer, and attach + # it to the end of the previous layer + # this current variable references to the final layer specified in model[trunk] for bi, block_params in enumerate(self.trunk): current = self.build_block(current, block_params) @@ -187,6 +203,7 @@ def build_model(self, save_reprs=False): def build_embed(self, conv_layer_i, batch_norm=True): + if conv_layer_i == -1: self.embed = tf.keras.Model(inputs=self.model.inputs, outputs=self.model.inputs) From 0abb2ff01041872be6f221322f6aa8b5ad8715a3 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Wed, 4 Aug 2021 11:56:24 -0400 Subject: [PATCH 09/22] training script comments --- bin/basenji_train.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bin/basenji_train.py b/bin/basenji_train.py index c0ea6e28..29948037 100755 --- a/bin/basenji_train.py +++ b/bin/basenji_train.py @@ -41,6 +41,9 @@ # main ################################################################################ def main(): + """ + runner script that builds a model according to JSON params, and trains it + """ usage = 'usage: %prog [options] ...' parser = OptionParser(usage) parser.add_option('-k', dest='keras_fit', @@ -108,6 +111,7 @@ def main(): # one GPU # initialize model + # the keys from params_model become direct attributes of seqnn_model seqnn_model = seqnn.SeqNN(params_model) # restore From 6abb24cc04889c35487cad11bb72d73d8f7ad0db Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Wed, 4 Aug 2021 12:11:48 -0400 Subject: [PATCH 10/22] moved generic parms to proper folder --- docs/generic_params.json | 57 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 docs/generic_params.json diff --git a/docs/generic_params.json b/docs/generic_params.json new file mode 100644 index 00000000..98aed24a --- /dev/null +++ b/docs/generic_params.json @@ -0,0 +1,57 @@ +{ + "train": { + "batch_size": 64 [int], + "shuffle_buffer": 8192 [int], + "optimizer": "sgd" [enum], + "loss": "bce" [enum], + "learning_rate": 0.005 [float], + "momentum": 0.98 [float], + "patience": 12 [int], + "train_epochs_min": 10 [int] + }, + "model": { + "seq_length": 1344 [int], + + "augment_rc": true [bool], + "augment_shift": 3 [int], + + "activation": "gelu" [enum], + "batch_norm": true [bool], + "bn_momentum": 0.90 [float], + + "trunk": [ + { + "name": "conv_block", + "filters": 288, + "kernel_size": 17, + "pool_size": 3 + }, + { + "name": "conv_tower", + "filters_init": 288, + "filters_mult": 1.122, + "kernel_size": 5, + "pool_size": 2, + "repeat": 6 + }, + { + "name": "conv_block", + "filters": 256, + "kernel_size": 1 + }, + { + "name": "dense_block", + "flatten": true, + "units": 768, + "dropout": 0.2 + } + ] [array: SeqNN block ()], + "head": [ + { + "name": "final", + "units": 164, + "activation": "sigmoid" + } + ] + } +} From fe7a3454c320ba39c7caad90069884fef6ca75a6 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 9 Aug 2021 20:37:16 -0400 Subject: [PATCH 11/22] formatting --- manuscripts/basset/params_basset.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/manuscripts/basset/params_basset.json b/manuscripts/basset/params_basset.json index 9c0a3f8d..924ccd23 100644 --- a/manuscripts/basset/params_basset.json +++ b/manuscripts/basset/params_basset.json @@ -9,6 +9,7 @@ "patience": 12, "train_epochs_min": 10 }, + "model": { "seq_length": 1344, @@ -41,11 +42,12 @@ }, { "name": "dense_block", - "flatten": true, + "flatten": true, "units": 768, "dropout": 0.2 } ], + "head": [ { "name": "final", From cce77d5eaa54d0283aeb13d5c605c82bbbe739bf Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 9 Aug 2021 20:37:34 -0400 Subject: [PATCH 12/22] name_func explanation --- basenji/blocks.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/basenji/blocks.py b/basenji/blocks.py index 6f639265..888c514d 100644 --- a/basenji/blocks.py +++ b/basenji/blocks.py @@ -1258,6 +1258,9 @@ def concat_to_2d(inputs, **kwargs): ############################################################ # Dictionary ############################################################ +""" +dictionary that maps config strings to their definitions in this module +""" name_func = { 'attention': attention, 'center_slice': center_slice, From b2aaace38aa307b5530497e4be25bd56e1155a11 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 9 Aug 2021 20:38:22 -0400 Subject: [PATCH 13/22] docs for seqnn.py --- basenji/seqnn.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/basenji/seqnn.py b/basenji/seqnn.py index 28035f8f..b9ef4982 100644 --- a/basenji/seqnn.py +++ b/basenji/seqnn.py @@ -30,11 +30,11 @@ class SeqNN(): def __init__(self, params): self.set_defaults() - print("===+===") + # print("===+===") for key, value in params.items(): print(key,value) self.__setattr__(key, value) - print("===+===") + # print("===+===") self.build_model() self.ensemble = None self.embed = None @@ -47,8 +47,8 @@ def set_defaults(self): def build_block(self, current, block_params): """ - Construct a SeqNN layer, and set it's previous layer to be the `current` - parameter. + Construct a SeqNN block (a series of layers), and set it's previous layer + to be the `current` parameter. Args: current (tf.keras.layers.Layer): The previous later to attach this new @@ -58,7 +58,7 @@ def build_block(self, current, block_params): of the elements of model[trunk] Returns: - [type]: [description] + tf.keras.layers.Layer: the final layer generated by this method """ block_args = {} @@ -77,7 +77,7 @@ def build_block(self, current, block_params): # set global defaults # try to see if the SeqNN object has the following attributes: - # if they do, put them in a dictionary + # if they do, put them in the dictionary block_args global_vars = ['activation', 'batch_norm', 'bn_momentum', 'bn_type', 'l2_scale', 'l1_scale', 'padding', 'kernel_initializer'] for gv in global_vars: @@ -90,6 +90,13 @@ def build_block(self, current, block_params): del block_args['name'] # switch for block + # get the desired next layer from the name_func dict + # set it to the next layer + # + # if name is capitalized, use the keras definition for that layer + # otherwise, use the layer in blocks.py + # + # pass all of the parameters associated with this block in as a keyword argument if block_name[0].islower(): block_func = blocks.name_func[block_name] current = block_func(current, **block_args) @@ -131,6 +138,8 @@ def build_model(self, save_reprs=False): # it to the end of the previous layer # this current variable references to the final layer specified in model[trunk] for bi, block_params in enumerate(self.trunk): + print(block_params) + print("======") current = self.build_block(current, block_params) # final activation @@ -143,6 +152,8 @@ def build_model(self, save_reprs=False): ################################################### # heads ################################################### + # iterate through the elements of params[head] and create those layers + # typically the final layer of the network head_keys = natsorted([v for v in vars(self) if v.startswith('head')]) self.heads = [getattr(self, hk) for hk in head_keys] From 8286fd3e675d4c454f855378243d1b3d3afa9cae Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 9 Aug 2021 20:38:54 -0400 Subject: [PATCH 14/22] test function for SeqNN functionality --- basenji/seqnn.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/basenji/seqnn.py b/basenji/seqnn.py index b9ef4982..347ff603 100644 --- a/basenji/seqnn.py +++ b/basenji/seqnn.py @@ -13,6 +13,7 @@ # limitations under the License. # ========================================================================= from __future__ import print_function +import json import pdb import sys @@ -395,3 +396,13 @@ def save(self, model_file, trunk=False): self.model_trunk.save(model_file, include_optimizer=False) else: self.model.save(model_file, include_optimizer=False) + +def main(): + with open("/home/moody/basset/basenji/manuscripts/basset/model_basset/params.json") as params_open: + params = json.load(params_open) + params_model = params['model'] + params_train = params['train'] + seqnn_model = SeqNN(params_model) + +if __name__ == "__main__": + main() \ No newline at end of file From d9994b532bb4ab343f645a928987555fb5eb0563 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 23 Aug 2021 20:20:15 -0400 Subject: [PATCH 15/22] script to generate params_react.json --- analysis.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 analysis.py diff --git a/analysis.py b/analysis.py new file mode 100644 index 00000000..a14fc8b9 --- /dev/null +++ b/analysis.py @@ -0,0 +1,38 @@ +from inspect import getmembers, isfunction, getargspec +import basenji.blocks +import json + +m = [(x[0], getargspec(x[1])) for x in getmembers(basenji.blocks, isfunction)] + +out = [] + +for function in m: + entry = {} + entry["params"] = [] + argspec = function[1] + + # names + rev_args = argspec.args[::-1] + + # values + print(type(argspec.defaults)) + print(function[0]) + rev_defaults = argspec.defaults[::-1] if argspec.defaults is not None else [] + + for n, x in enumerate(rev_defaults): + entry["params"].append({ + "name":rev_args[n], + "value":x + }) + + entry["kwargs"] = argspec.keywords + entry["function_name"] = function[0] + + out.append(entry) + # out[function[0]] = function[1] + +# print(out["conv_dna"]) + + +file = open("params_react.json", "w") +file.write(json.dumps(out, indent=4, sort_keys=True)) From f16f1b861926588910bd3314cb3a02275f43e55d Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 23 Aug 2021 20:20:47 -0400 Subject: [PATCH 16/22] generated function definitions for the react frontend --- params_react.json | 911 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 911 insertions(+) create mode 100644 params_react.json diff --git a/params_react.json b/params_react.json new file mode 100644 index 00000000..4d70e5a1 --- /dev/null +++ b/params_react.json @@ -0,0 +1,911 @@ +[ + { + "function_name": "attention", + "kwargs": "kwargs", + "params": [ + { + "name": "bn_type", + "value": "standard" + }, + { + "name": "bn_momentum", + "value": 0.99 + }, + { + "name": "batch_norm", + "value": false + }, + { + "name": "max_relative_position", + "value": 64 + }, + { + "name": "kq_depth", + "value": null + } + ] + }, + { + "function_name": "average_pooling", + "kwargs": "kwargs", + "params": [ + { + "name": "pool_size", + "value": 2 + } + ] + }, + { + "function_name": "average_to_2d", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "center_average", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "center_slice", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "concat_dist_2d", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "concat_position", + "kwargs": "kwargs", + "params": [ + { + "name": "power", + "value": 1 + }, + { + "name": "transform", + "value": "abs" + } + ] + }, + { + "function_name": "concat_to_2d", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "conv_block", + "kwargs": null, + "params": [ + { + "name": "padding", + "value": "same" + }, + { + "name": "kernel_initializer", + "value": "he_normal" + }, + { + "name": "bn_type", + "value": "standard" + }, + { + "name": "bn_gamma", + "value": null + }, + { + "name": "bn_momentum", + "value": 0.99 + }, + { + "name": "batch_norm", + "value": false + }, + { + "name": "pool_size", + "value": 1 + }, + { + "name": "residual", + "value": false + }, + { + "name": "conv_type", + "value": "standard" + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "l2_scale", + "value": 0 + }, + { + "name": "dilation_rate", + "value": 1 + }, + { + "name": "strides", + "value": 1 + }, + { + "name": "activation_end", + "value": null + }, + { + "name": "activation", + "value": "relu" + }, + { + "name": "kernel_size", + "value": 1 + }, + { + "name": "filters", + "value": null + } + ] + }, + { + "function_name": "conv_block_2d", + "kwargs": null, + "params": [ + { + "name": "symmetric", + "value": false + }, + { + "name": "kernel_initializer", + "value": "he_normal" + }, + { + "name": "bn_type", + "value": "standard" + }, + { + "name": "bn_gamma", + "value": "ones" + }, + { + "name": "bn_momentum", + "value": 0.99 + }, + { + "name": "batch_norm", + "value": false + }, + { + "name": "pool_size", + "value": 1 + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "l2_scale", + "value": 0 + }, + { + "name": "dilation_rate", + "value": 1 + }, + { + "name": "strides", + "value": 1 + }, + { + "name": "kernel_size", + "value": 1 + }, + { + "name": "conv_type", + "value": "standard" + }, + { + "name": "activation", + "value": "relu" + }, + { + "name": "filters", + "value": 128 + } + ] + }, + { + "function_name": "conv_dna", + "kwargs": null, + "params": [ + { + "name": "padding", + "value": "same" + }, + { + "name": "kernel_initializer", + "value": "he_normal" + }, + { + "name": "conv_type", + "value": "standard" + }, + { + "name": "bn_type", + "value": "standard" + }, + { + "name": "bn_gamma", + "value": null + }, + { + "name": "bn_momentum", + "value": 0.99 + }, + { + "name": "batch_norm", + "value": false + }, + { + "name": "pool_size", + "value": 1 + }, + { + "name": "dropout_residual", + "value": 0 + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "residual", + "value": false + }, + { + "name": "l2_scale", + "value": 0 + }, + { + "name": "strides", + "value": 1 + }, + { + "name": "activation", + "value": "relu" + }, + { + "name": "kernel_size", + "value": 15 + }, + { + "name": "filters", + "value": null + } + ] + }, + { + "function_name": "conv_nac", + "kwargs": null, + "params": [ + { + "name": "padding", + "value": "same" + }, + { + "name": "kernel_initializer", + "value": "he_normal" + }, + { + "name": "bn_type", + "value": "standard" + }, + { + "name": "bn_gamma", + "value": null + }, + { + "name": "bn_momentum", + "value": 0.99 + }, + { + "name": "batch_norm", + "value": false + }, + { + "name": "pool_size", + "value": 1 + }, + { + "name": "residual", + "value": false + }, + { + "name": "conv_type", + "value": "standard" + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "l2_scale", + "value": 0 + }, + { + "name": "dilation_rate", + "value": 1 + }, + { + "name": "strides", + "value": 1 + }, + { + "name": "activation", + "value": "relu" + }, + { + "name": "kernel_size", + "value": 1 + }, + { + "name": "filters", + "value": null + } + ] + }, + { + "function_name": "conv_tower", + "kwargs": "kwargs", + "params": [ + { + "name": "repeat", + "value": 1 + }, + { + "name": "divisible_by", + "value": 1 + }, + { + "name": "filters_mult", + "value": null + }, + { + "name": "filters_end", + "value": null + } + ] + }, + { + "function_name": "conv_tower_nac", + "kwargs": "kwargs", + "params": [ + { + "name": "repeat", + "value": 1 + }, + { + "name": "divisible_by", + "value": 1 + }, + { + "name": "filters_mult", + "value": null + }, + { + "name": "filters_end", + "value": null + } + ] + }, + { + "function_name": "conv_tower_v1", + "kwargs": "kwargs", + "params": [ + { + "name": "repeat", + "value": 1 + }, + { + "name": "filters_mult", + "value": 1 + } + ] + }, + { + "function_name": "cropping_2d", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "dense", + "kwargs": "kwargs", + "params": [ + { + "name": "l1_scale", + "value": 0 + }, + { + "name": "l2_scale", + "value": 0 + }, + { + "name": "kernel_initializer", + "value": "he_normal" + }, + { + "name": "activation", + "value": "linear" + } + ] + }, + { + "function_name": "dense_block", + "kwargs": "kwargs", + "params": [ + { + "name": "kernel_initializer", + "value": "he_normal" + }, + { + "name": "bn_type", + "value": "standard" + }, + { + "name": "bn_gamma", + "value": null + }, + { + "name": "bn_momentum", + "value": 0.99 + }, + { + "name": "batch_norm", + "value": false + }, + { + "name": "residual", + "value": false + }, + { + "name": "l1_scale", + "value": 0 + }, + { + "name": "l2_scale", + "value": 0 + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "flatten", + "value": false + }, + { + "name": "activation_end", + "value": null + }, + { + "name": "activation", + "value": "relu" + }, + { + "name": "units", + "value": null + } + ] + }, + { + "function_name": "dilated_dense", + "kwargs": "kwargs", + "params": [ + { + "name": "repeat", + "value": 1 + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "conv_type", + "value": "standard" + }, + { + "name": "rate_mult", + "value": 2 + }, + { + "name": "kernel_size", + "value": 3 + } + ] + }, + { + "function_name": "dilated_residual", + "kwargs": "kwargs", + "params": [ + { + "name": "round", + "value": false + }, + { + "name": "batch_norm", + "value": false + }, + { + "name": "conv_type", + "value": "standard" + }, + { + "name": "repeat", + "value": 1 + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "rate_mult", + "value": 2 + }, + { + "name": "kernel_size", + "value": 3 + } + ] + }, + { + "function_name": "dilated_residual_2d", + "kwargs": "kwargs", + "params": [ + { + "name": "symmetric", + "value": true + }, + { + "name": "repeat", + "value": 1 + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "rate_mult", + "value": 2 + }, + { + "name": "kernel_size", + "value": 3 + } + ] + }, + { + "function_name": "dilated_residual_nac", + "kwargs": "kwargs", + "params": [ + { + "name": "repeat", + "value": 1 + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "rate_mult", + "value": 2 + }, + { + "name": "kernel_size", + "value": 3 + } + ] + }, + { + "function_name": "dot_to_2d", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "exp", + "kwargs": "kwargs", + "params": [ + { + "name": "minus", + "value": null + }, + { + "name": "base", + "value": null + } + ] + }, + { + "function_name": "factor_inverse", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "final", + "kwargs": "kwargs", + "params": [ + { + "name": "l1_scale", + "value": 0 + }, + { + "name": "l2_scale", + "value": 0 + }, + { + "name": "kernel_initializer", + "value": "he_normal" + }, + { + "name": "flatten", + "value": false + }, + { + "name": "activation", + "value": "linear" + } + ] + }, + { + "function_name": "geodot_to_2d", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "global_context", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "max_to_2d", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "multihead_attention", + "kwargs": "kwargs", + "params": [ + { + "name": "dense_expansion", + "value": 0 + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "position_dropout", + "value": 0 + }, + { + "name": "attention_dropout", + "value": 0 + }, + { + "name": "bn_momentum", + "value": 0.9 + }, + { + "name": "activation", + "value": "relu" + }, + { + "name": "num_position_features", + "value": null + }, + { + "name": "out_size", + "value": null + }, + { + "name": "heads", + "value": 1 + }, + { + "name": "key_size", + "value": null + } + ] + }, + { + "function_name": "one_to_two", + "kwargs": "kwargs", + "params": [ + { + "name": "operation", + "value": "mean" + } + ] + }, + { + "function_name": "position_encoding", + "kwargs": null, + "params": [ + { + "name": "min_rate", + "value": 0.0001 + } + ] + }, + { + "function_name": "res_tower", + "kwargs": "kwargs", + "params": [ + { + "name": "num_convs", + "value": 2 + }, + { + "name": "repeat", + "value": 1 + }, + { + "name": "divisible_by", + "value": 1 + }, + { + "name": "pool_size", + "value": 2 + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "kernel_size", + "value": 1 + }, + { + "name": "filters_mult", + "value": null + }, + { + "name": "filters_end", + "value": null + } + ] + }, + { + "function_name": "squeeze_excite", + "kwargs": "kwargs", + "params": [ + { + "name": "bn_momentum", + "value": 0.9 + }, + { + "name": "batch_norm", + "value": false + }, + { + "name": "additive", + "value": false + }, + { + "name": "bottleneck_ratio", + "value": 8 + }, + { + "name": "activation", + "value": "relu" + } + ] + }, + { + "function_name": "symmetrize_2d", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "transformer", + "kwargs": "kwargs", + "params": [ + { + "name": "dropout", + "value": 0.25 + }, + { + "name": "position_dropout", + "value": 0.01 + }, + { + "name": "attention_dropout", + "value": 0.05 + }, + { + "name": "dense_expansion", + "value": 2.0 + }, + { + "name": "activation", + "value": "relu" + }, + { + "name": "num_position_features", + "value": null + }, + { + "name": "out_size", + "value": null + }, + { + "name": "heads", + "value": 1 + }, + { + "name": "key_size", + "value": null + } + ] + }, + { + "function_name": "transformer_tower", + "kwargs": "kwargs", + "params": [ + { + "name": "repeat", + "value": 2 + } + ] + }, + { + "function_name": "upper_tri", + "kwargs": "kwargs", + "params": [ + { + "name": "diagonal_offset", + "value": 2 + } + ] + }, + { + "function_name": "wheeze_excite", + "kwargs": "kwargs", + "params": [] + }, + { + "function_name": "xception_block", + "kwargs": "kwargs", + "params": [ + { + "name": "pool_size", + "value": 2 + }, + { + "name": "dropout", + "value": 0 + }, + { + "name": "kernel_size", + "value": 1 + }, + { + "name": "filters", + "value": null + } + ] + }, + { + "function_name": "xception_tower", + "kwargs": "kwargs", + "params": [ + { + "name": "repeat", + "value": 1 + }, + { + "name": "filters_mult", + "value": 1 + } + ] + } +] \ No newline at end of file From ca48e8fa4a4fff24ff47e1a2d46847ac9b8be8f1 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 23 Aug 2021 20:22:29 -0400 Subject: [PATCH 17/22] seqnn comments --- basenji/seqnn.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/basenji/seqnn.py b/basenji/seqnn.py index 347ff603..213f0a63 100644 --- a/basenji/seqnn.py +++ b/basenji/seqnn.py @@ -31,11 +31,8 @@ class SeqNN(): def __init__(self, params): self.set_defaults() - # print("===+===") for key, value in params.items(): - print(key,value) self.__setattr__(key, value) - # print("===+===") self.build_model() self.ensemble = None self.embed = None @@ -98,10 +95,13 @@ def build_block(self, current, block_params): # otherwise, use the layer in blocks.py # # pass all of the parameters associated with this block in as a keyword argument + + # basenji custom layer if block_name[0].islower(): block_func = blocks.name_func[block_name] current = block_func(current, **block_args) + # keras functional style layer else: block_func = blocks.keras_func[block_name] current = block_func(**block_args)(current) @@ -135,12 +135,9 @@ def build_model(self, save_reprs=False): ################################################### # build convolution blocks ################################################### - # iterate through the elements of model[trunk], generate a layer, and attach + # iterate through the elements of model[trunk], generate a block, and attach # it to the end of the previous layer - # this current variable references to the final layer specified in model[trunk] for bi, block_params in enumerate(self.trunk): - print(block_params) - print("======") current = self.build_block(current, block_params) # final activation From 00ec27fae95dfaead1585f1972630338c74c289b Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 23 Aug 2021 20:22:40 -0400 Subject: [PATCH 18/22] annotated params --- docs/generic_params.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/generic_params.json b/docs/generic_params.json index 98aed24a..c846a747 100644 --- a/docs/generic_params.json +++ b/docs/generic_params.json @@ -7,7 +7,8 @@ "learning_rate": 0.005 [float], "momentum": 0.98 [float], "patience": 12 [int], - "train_epochs_min": 10 [int] + "train_epochs_min": 10 [int: default 1], + "train_epochs_max": 10 [int: default 10000] }, "model": { "seq_length": 1344 [int], @@ -21,7 +22,7 @@ "trunk": [ { - "name": "conv_block", + "name": "conv_block", [enum: see module variables in blocks.py] "filters": 288, "kernel_size": 17, "pool_size": 3 From 97becc4fa576e781ee87888f304f6b333391c77f Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 23 Aug 2021 20:27:00 -0400 Subject: [PATCH 19/22] removed debug file --- manuscripts/basset/layers.txt | 98 ----------------------------------- 1 file changed, 98 deletions(-) delete mode 100644 manuscripts/basset/layers.txt diff --git a/manuscripts/basset/layers.txt b/manuscripts/basset/layers.txt deleted file mode 100644 index e54fb7da..00000000 --- a/manuscripts/basset/layers.txt +++ /dev/null @@ -1,98 +0,0 @@ -Model: "model_1" -__________________________________________________________________________________________________ -Layer (type) Output Shape Param # Connected to -================================================================================================== -sequence (InputLayer) [(None, 1344, 4)] 0 -__________________________________________________________________________________________________ -stochastic_reverse_complement ( ((None, 1344, 4), () 0 sequence[0][0] -__________________________________________________________________________________________________ -stochastic_shift (StochasticShi (None, 1344, 4) 0 stochastic_reverse_complement[0][ -__________________________________________________________________________________________________ -gelu (GELU) (None, 1344, 4) 0 stochastic_shift[0][0] -__________________________________________________________________________________________________ -conv1d (Conv1D) (None, 1344, 288) 19584 gelu[0][0] -__________________________________________________________________________________________________ -batch_normalization (BatchNorma (None, 1344, 288) 1152 conv1d[0][0] -__________________________________________________________________________________________________ -max_pooling1d (MaxPooling1D) (None, 448, 288) 0 batch_normalization[0][0] -__________________________________________________________________________________________________ -gelu_1 (GELU) (None, 448, 288) 0 max_pooling1d[0][0] -__________________________________________________________________________________________________ -conv1d_1 (Conv1D) (None, 448, 288) 414720 gelu_1[0][0] -__________________________________________________________________________________________________ -batch_normalization_1 (BatchNor (None, 448, 288) 1152 conv1d_1[0][0] -__________________________________________________________________________________________________ -max_pooling1d_1 (MaxPooling1D) (None, 224, 288) 0 batch_normalization_1[0][0] -__________________________________________________________________________________________________ -gelu_2 (GELU) (None, 224, 288) 0 max_pooling1d_1[0][0] -__________________________________________________________________________________________________ -conv1d_2 (Conv1D) (None, 224, 323) 465120 gelu_2[0][0] -__________________________________________________________________________________________________ -batch_normalization_2 (BatchNor (None, 224, 323) 1292 conv1d_2[0][0] -__________________________________________________________________________________________________ -max_pooling1d_2 (MaxPooling1D) (None, 112, 323) 0 batch_normalization_2[0][0] -__________________________________________________________________________________________________ -gelu_3 (GELU) (None, 112, 323) 0 max_pooling1d_2[0][0] -__________________________________________________________________________________________________ -conv1d_3 (Conv1D) (None, 112, 363) 586245 gelu_3[0][0] -__________________________________________________________________________________________________ -batch_normalization_3 (BatchNor (None, 112, 363) 1452 conv1d_3[0][0] -__________________________________________________________________________________________________ -max_pooling1d_3 (MaxPooling1D) (None, 56, 363) 0 batch_normalization_3[0][0] -__________________________________________________________________________________________________ -gelu_4 (GELU) (None, 56, 363) 0 max_pooling1d_3[0][0] -__________________________________________________________________________________________________ -conv1d_4 (Conv1D) (None, 56, 407) 738705 gelu_4[0][0] -__________________________________________________________________________________________________ -batch_normalization_4 (BatchNor (None, 56, 407) 1628 conv1d_4[0][0] -__________________________________________________________________________________________________ -max_pooling1d_4 (MaxPooling1D) (None, 28, 407) 0 batch_normalization_4[0][0] -__________________________________________________________________________________________________ -gelu_5 (GELU) (None, 28, 407) 0 max_pooling1d_4[0][0] -__________________________________________________________________________________________________ -conv1d_5 (Conv1D) (None, 28, 456) 927960 gelu_5[0][0] -__________________________________________________________________________________________________ -batch_normalization_5 (BatchNor (None, 28, 456) 1824 conv1d_5[0][0] -__________________________________________________________________________________________________ -max_pooling1d_5 (MaxPooling1D) (None, 14, 456) 0 batch_normalization_5[0][0] -__________________________________________________________________________________________________ -gelu_6 (GELU) (None, 14, 456) 0 max_pooling1d_5[0][0] -__________________________________________________________________________________________________ -conv1d_6 (Conv1D) (None, 14, 512) 1167360 gelu_6[0][0] -__________________________________________________________________________________________________ -batch_normalization_6 (BatchNor (None, 14, 512) 2048 conv1d_6[0][0] -__________________________________________________________________________________________________ -max_pooling1d_6 (MaxPooling1D) (None, 7, 512) 0 batch_normalization_6[0][0] -__________________________________________________________________________________________________ -gelu_7 (GELU) (None, 7, 512) 0 max_pooling1d_6[0][0] -__________________________________________________________________________________________________ -conv1d_7 (Conv1D) (None, 7, 256) 131072 gelu_7[0][0] -__________________________________________________________________________________________________ -batch_normalization_7 (BatchNor (None, 7, 256) 1024 conv1d_7[0][0] -__________________________________________________________________________________________________ -gelu_8 (GELU) (None, 7, 256) 0 batch_normalization_7[0][0] -__________________________________________________________________________________________________ -reshape (Reshape) (None, 1, 1792) 0 gelu_8[0][0] -__________________________________________________________________________________________________ -dense (Dense) (None, 1, 768) 1376256 reshape[0][0] -__________________________________________________________________________________________________ -batch_normalization_8 (BatchNor (None, 1, 768) 3072 dense[0][0] -__________________________________________________________________________________________________ -dropout (Dropout) (None, 1, 768) 0 batch_normalization_8[0][0] -__________________________________________________________________________________________________ -gelu_9 (GELU) (None, 1, 768) 0 dropout[0][0] -__________________________________________________________________________________________________ -dense_1 (Dense) (None, 1, 164) 126116 gelu_9[0][0] -__________________________________________________________________________________________________ -switch_reverse (SwitchReverse) (None, 1, 164) 0 dense_1[0][0] - stochastic_reverse_complement[0][ -================================================================================================== -Total params: 5,967,782 -Trainable params: 5,960,460 -Non-trainable params: 7,322 -__________________________________________________________________________________________________ -None -model_strides [192] -target_lengths [1] -target_crops [3] -No checkpoints found. From 0fb66324549bbd52c10ce714a1ad988e54ce2773 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 23 Aug 2021 20:27:49 -0400 Subject: [PATCH 20/22] removed relative path call --- manuscripts/basset/make_dataset.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manuscripts/basset/make_dataset.sh b/manuscripts/basset/make_dataset.sh index f8a20925..d11dca26 100755 --- a/manuscripts/basset/make_dataset.sh +++ b/manuscripts/basset/make_dataset.sh @@ -16,4 +16,4 @@ then mv wgEncodeHg19ConsensusSignalArtifactRegions.bed data/ fi -python3 ../../bin/basenji_data.py -b $blacklist --local -p 8 -r 4096 -w 192 -l 1344 --peaks -v .12 -t .12 --stride 192 --stride_test 192 --crop 576 -o data_basset $fasta data/targets.txt +basenji_data.py -b $blacklist --local -p 8 -r 4096 -w 192 -l 1344 --peaks -v .12 -t .12 --stride 192 --stride_test 192 --crop 576 -o data_basset $fasta data/targets.txt From 040a5a7324d900d08eead3aa398a54ca65f9674b Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 23 Aug 2021 20:31:55 -0400 Subject: [PATCH 21/22] deleted unnecessary files --- analysis.py | 38 -- docs/generic_params.json | 3 + manuscripts/basset/generic_params.json | 57 -- manuscripts/basset/out.log | 83 +++ manuscripts/basset/path.rc | 1 - params_react.json | 911 ------------------------- 6 files changed, 86 insertions(+), 1007 deletions(-) delete mode 100644 analysis.py delete mode 100644 manuscripts/basset/generic_params.json create mode 100644 manuscripts/basset/out.log delete mode 100644 manuscripts/basset/path.rc delete mode 100644 params_react.json diff --git a/analysis.py b/analysis.py deleted file mode 100644 index a14fc8b9..00000000 --- a/analysis.py +++ /dev/null @@ -1,38 +0,0 @@ -from inspect import getmembers, isfunction, getargspec -import basenji.blocks -import json - -m = [(x[0], getargspec(x[1])) for x in getmembers(basenji.blocks, isfunction)] - -out = [] - -for function in m: - entry = {} - entry["params"] = [] - argspec = function[1] - - # names - rev_args = argspec.args[::-1] - - # values - print(type(argspec.defaults)) - print(function[0]) - rev_defaults = argspec.defaults[::-1] if argspec.defaults is not None else [] - - for n, x in enumerate(rev_defaults): - entry["params"].append({ - "name":rev_args[n], - "value":x - }) - - entry["kwargs"] = argspec.keywords - entry["function_name"] = function[0] - - out.append(entry) - # out[function[0]] = function[1] - -# print(out["conv_dna"]) - - -file = open("params_react.json", "w") -file.write(json.dumps(out, indent=4, sort_keys=True)) diff --git a/docs/generic_params.json b/docs/generic_params.json index c846a747..94e3a7ee 100644 --- a/docs/generic_params.json +++ b/docs/generic_params.json @@ -1,3 +1,6 @@ +// THIS IS NOT A VALID JSON PARAMTER FILE +// THIS IS AN ANNOTATED JSON SHOWING ALL THE POSSIBLE CONFIGURATIONS, AND WHAT THEIR TYPES ARE + { "train": { "batch_size": 64 [int], diff --git a/manuscripts/basset/generic_params.json b/manuscripts/basset/generic_params.json deleted file mode 100644 index 98aed24a..00000000 --- a/manuscripts/basset/generic_params.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "train": { - "batch_size": 64 [int], - "shuffle_buffer": 8192 [int], - "optimizer": "sgd" [enum], - "loss": "bce" [enum], - "learning_rate": 0.005 [float], - "momentum": 0.98 [float], - "patience": 12 [int], - "train_epochs_min": 10 [int] - }, - "model": { - "seq_length": 1344 [int], - - "augment_rc": true [bool], - "augment_shift": 3 [int], - - "activation": "gelu" [enum], - "batch_norm": true [bool], - "bn_momentum": 0.90 [float], - - "trunk": [ - { - "name": "conv_block", - "filters": 288, - "kernel_size": 17, - "pool_size": 3 - }, - { - "name": "conv_tower", - "filters_init": 288, - "filters_mult": 1.122, - "kernel_size": 5, - "pool_size": 2, - "repeat": 6 - }, - { - "name": "conv_block", - "filters": 256, - "kernel_size": 1 - }, - { - "name": "dense_block", - "flatten": true, - "units": 768, - "dropout": 0.2 - } - ] [array: SeqNN block ()], - "head": [ - { - "name": "final", - "units": 164, - "activation": "sigmoid" - } - ] - } -} diff --git a/manuscripts/basset/out.log b/manuscripts/basset/out.log new file mode 100644 index 00000000..c30a44df --- /dev/null +++ b/manuscripts/basset/out.log @@ -0,0 +1,83 @@ +Model: "model_1" +__________________________________________________________________________________________________ +Layer (type) Output Shape Param # Connected to +================================================================================================== +sequence (InputLayer) [(None, 1344, 4)] 0 +__________________________________________________________________________________________________ +stochastic_reverse_complement ( ((None, 1344, 4), () 0 sequence[0][0] +__________________________________________________________________________________________________ +stochastic_shift (StochasticShi (None, 1344, 4) 0 stochastic_reverse_complement[0][ +__________________________________________________________________________________________________ +gelu (GELU) (None, 1344, 4) 0 stochastic_shift[0][0] +__________________________________________________________________________________________________ +conv1d (Conv1D) (None, 1344, 288) 19584 gelu[0][0] +__________________________________________________________________________________________________ +batch_normalization (BatchNorma (None, 1344, 288) 1152 conv1d[0][0] +__________________________________________________________________________________________________ +max_pooling1d (MaxPooling1D) (None, 448, 288) 0 batch_normalization[0][0] +__________________________________________________________________________________________________ +gelu_1 (GELU) (None, 448, 288) 0 max_pooling1d[0][0] +__________________________________________________________________________________________________ +conv1d_1 (Conv1D) (None, 448, 288) 414720 gelu_1[0][0] +__________________________________________________________________________________________________ +batch_normalization_1 (BatchNor (None, 448, 288) 1152 conv1d_1[0][0] +__________________________________________________________________________________________________ +max_pooling1d_1 (MaxPooling1D) (None, 224, 288) 0 batch_normalization_1[0][0] +__________________________________________________________________________________________________ +gelu_2 (GELU) (None, 224, 288) 0 max_pooling1d_1[0][0] +__________________________________________________________________________________________________ +conv1d_2 (Conv1D) (None, 224, 323) 465120 gelu_2[0][0] +__________________________________________________________________________________________________ +batch_normalization_2 (BatchNor (None, 224, 323) 1292 conv1d_2[0][0] +__________________________________________________________________________________________________ +max_pooling1d_2 (MaxPooling1D) (None, 112, 323) 0 batch_normalization_2[0][0] +__________________________________________________________________________________________________ +gelu_3 (GELU) (None, 112, 323) 0 max_pooling1d_2[0][0] +__________________________________________________________________________________________________ +conv1d_3 (Conv1D) (None, 112, 363) 586245 gelu_3[0][0] +__________________________________________________________________________________________________ +batch_normalization_3 (BatchNor (None, 112, 363) 1452 conv1d_3[0][0] +__________________________________________________________________________________________________ +max_pooling1d_3 (MaxPooling1D) (None, 56, 363) 0 batch_normalization_3[0][0] +__________________________________________________________________________________________________ +gelu_4 (GELU) (None, 56, 363) 0 max_pooling1d_3[0][0] +__________________________________________________________________________________________________ +conv1d_4 (Conv1D) (None, 56, 407) 738705 gelu_4[0][0] +__________________________________________________________________________________________________ +batch_normalization_4 (BatchNor (None, 56, 407) 1628 conv1d_4[0][0] +__________________________________________________________________________________________________ +max_pooling1d_4 (MaxPooling1D) (None, 28, 407) 0 batch_normalization_4[0][0] +__________________________________________________________________________________________________ +gelu_5 (GELU) (None, 28, 407) 0 max_pooling1d_4[0][0] +__________________________________________________________________________________________________ +conv1d_5 (Conv1D) (None, 28, 456) 927960 gelu_5[0][0] +__________________________________________________________________________________________________ +batch_normalization_5 (BatchNor (None, 28, 456) 1824 conv1d_5[0][0] +__________________________________________________________________________________________________ +max_pooling1d_5 (MaxPooling1D) (None, 14, 456) 0 batch_normalization_5[0][0] +__________________________________________________________________________________________________ +gelu_6 (GELU) (None, 14, 456) 0 max_pooling1d_5[0][0] +__________________________________________________________________________________________________ +conv1d_6 (Conv1D) (None, 14, 512) 1167360 gelu_6[0][0] +__________________________________________________________________________________________________ +batch_normalization_6 (BatchNor (None, 14, 512) 2048 conv1d_6[0][0] +__________________________________________________________________________________________________ +max_pooling1d_6 (MaxPooling1D) (None, 7, 512) 0 batch_normalization_6[0][0] +__________________________________________________________________________________________________ +gelu_7 (GELU) (None, 7, 512) 0 max_pooling1d_6[0][0] +__________________________________________________________________________________________________ +conv1d_7 (Conv1D) (None, 7, 256) 131072 gelu_7[0][0] +__________________________________________________________________________________________________ +batch_normalization_7 (BatchNor (None, 7, 256) 1024 conv1d_7[0][0] +__________________________________________________________________________________________________ +gelu_8 (GELU) (None, 7, 256) 0 batch_normalization_7[0][0] +__________________________________________________________________________________________________ +reshape (Reshape) (None, 1, 1792) 0 gelu_8[0][0] +__________________________________________________________________________________________________ +dense (Dense) (None, 1, 768) 1376256 reshape[0][0] +__________________________________________________________________________________________________ +batch_normalization_8 (BatchNor (None, 1, 768) 3072 dense[0][0] +__________________________________________________________________________________________________ +dropout (Dropout) (None, 1, 768) 0 batch_normalization_8[0][0] +__________________________________________________________________________________________________ +gelu_9 (GELU) (None, 1, 768) 0 dropout[0][0] diff --git a/manuscripts/basset/path.rc b/manuscripts/basset/path.rc deleted file mode 100644 index 0d5d6a06..00000000 --- a/manuscripts/basset/path.rc +++ /dev/null @@ -1 +0,0 @@ -export PATH=$PATH:/home/moody/basset/basenji/bin \ No newline at end of file diff --git a/params_react.json b/params_react.json deleted file mode 100644 index 4d70e5a1..00000000 --- a/params_react.json +++ /dev/null @@ -1,911 +0,0 @@ -[ - { - "function_name": "attention", - "kwargs": "kwargs", - "params": [ - { - "name": "bn_type", - "value": "standard" - }, - { - "name": "bn_momentum", - "value": 0.99 - }, - { - "name": "batch_norm", - "value": false - }, - { - "name": "max_relative_position", - "value": 64 - }, - { - "name": "kq_depth", - "value": null - } - ] - }, - { - "function_name": "average_pooling", - "kwargs": "kwargs", - "params": [ - { - "name": "pool_size", - "value": 2 - } - ] - }, - { - "function_name": "average_to_2d", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "center_average", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "center_slice", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "concat_dist_2d", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "concat_position", - "kwargs": "kwargs", - "params": [ - { - "name": "power", - "value": 1 - }, - { - "name": "transform", - "value": "abs" - } - ] - }, - { - "function_name": "concat_to_2d", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "conv_block", - "kwargs": null, - "params": [ - { - "name": "padding", - "value": "same" - }, - { - "name": "kernel_initializer", - "value": "he_normal" - }, - { - "name": "bn_type", - "value": "standard" - }, - { - "name": "bn_gamma", - "value": null - }, - { - "name": "bn_momentum", - "value": 0.99 - }, - { - "name": "batch_norm", - "value": false - }, - { - "name": "pool_size", - "value": 1 - }, - { - "name": "residual", - "value": false - }, - { - "name": "conv_type", - "value": "standard" - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "l2_scale", - "value": 0 - }, - { - "name": "dilation_rate", - "value": 1 - }, - { - "name": "strides", - "value": 1 - }, - { - "name": "activation_end", - "value": null - }, - { - "name": "activation", - "value": "relu" - }, - { - "name": "kernel_size", - "value": 1 - }, - { - "name": "filters", - "value": null - } - ] - }, - { - "function_name": "conv_block_2d", - "kwargs": null, - "params": [ - { - "name": "symmetric", - "value": false - }, - { - "name": "kernel_initializer", - "value": "he_normal" - }, - { - "name": "bn_type", - "value": "standard" - }, - { - "name": "bn_gamma", - "value": "ones" - }, - { - "name": "bn_momentum", - "value": 0.99 - }, - { - "name": "batch_norm", - "value": false - }, - { - "name": "pool_size", - "value": 1 - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "l2_scale", - "value": 0 - }, - { - "name": "dilation_rate", - "value": 1 - }, - { - "name": "strides", - "value": 1 - }, - { - "name": "kernel_size", - "value": 1 - }, - { - "name": "conv_type", - "value": "standard" - }, - { - "name": "activation", - "value": "relu" - }, - { - "name": "filters", - "value": 128 - } - ] - }, - { - "function_name": "conv_dna", - "kwargs": null, - "params": [ - { - "name": "padding", - "value": "same" - }, - { - "name": "kernel_initializer", - "value": "he_normal" - }, - { - "name": "conv_type", - "value": "standard" - }, - { - "name": "bn_type", - "value": "standard" - }, - { - "name": "bn_gamma", - "value": null - }, - { - "name": "bn_momentum", - "value": 0.99 - }, - { - "name": "batch_norm", - "value": false - }, - { - "name": "pool_size", - "value": 1 - }, - { - "name": "dropout_residual", - "value": 0 - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "residual", - "value": false - }, - { - "name": "l2_scale", - "value": 0 - }, - { - "name": "strides", - "value": 1 - }, - { - "name": "activation", - "value": "relu" - }, - { - "name": "kernel_size", - "value": 15 - }, - { - "name": "filters", - "value": null - } - ] - }, - { - "function_name": "conv_nac", - "kwargs": null, - "params": [ - { - "name": "padding", - "value": "same" - }, - { - "name": "kernel_initializer", - "value": "he_normal" - }, - { - "name": "bn_type", - "value": "standard" - }, - { - "name": "bn_gamma", - "value": null - }, - { - "name": "bn_momentum", - "value": 0.99 - }, - { - "name": "batch_norm", - "value": false - }, - { - "name": "pool_size", - "value": 1 - }, - { - "name": "residual", - "value": false - }, - { - "name": "conv_type", - "value": "standard" - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "l2_scale", - "value": 0 - }, - { - "name": "dilation_rate", - "value": 1 - }, - { - "name": "strides", - "value": 1 - }, - { - "name": "activation", - "value": "relu" - }, - { - "name": "kernel_size", - "value": 1 - }, - { - "name": "filters", - "value": null - } - ] - }, - { - "function_name": "conv_tower", - "kwargs": "kwargs", - "params": [ - { - "name": "repeat", - "value": 1 - }, - { - "name": "divisible_by", - "value": 1 - }, - { - "name": "filters_mult", - "value": null - }, - { - "name": "filters_end", - "value": null - } - ] - }, - { - "function_name": "conv_tower_nac", - "kwargs": "kwargs", - "params": [ - { - "name": "repeat", - "value": 1 - }, - { - "name": "divisible_by", - "value": 1 - }, - { - "name": "filters_mult", - "value": null - }, - { - "name": "filters_end", - "value": null - } - ] - }, - { - "function_name": "conv_tower_v1", - "kwargs": "kwargs", - "params": [ - { - "name": "repeat", - "value": 1 - }, - { - "name": "filters_mult", - "value": 1 - } - ] - }, - { - "function_name": "cropping_2d", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "dense", - "kwargs": "kwargs", - "params": [ - { - "name": "l1_scale", - "value": 0 - }, - { - "name": "l2_scale", - "value": 0 - }, - { - "name": "kernel_initializer", - "value": "he_normal" - }, - { - "name": "activation", - "value": "linear" - } - ] - }, - { - "function_name": "dense_block", - "kwargs": "kwargs", - "params": [ - { - "name": "kernel_initializer", - "value": "he_normal" - }, - { - "name": "bn_type", - "value": "standard" - }, - { - "name": "bn_gamma", - "value": null - }, - { - "name": "bn_momentum", - "value": 0.99 - }, - { - "name": "batch_norm", - "value": false - }, - { - "name": "residual", - "value": false - }, - { - "name": "l1_scale", - "value": 0 - }, - { - "name": "l2_scale", - "value": 0 - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "flatten", - "value": false - }, - { - "name": "activation_end", - "value": null - }, - { - "name": "activation", - "value": "relu" - }, - { - "name": "units", - "value": null - } - ] - }, - { - "function_name": "dilated_dense", - "kwargs": "kwargs", - "params": [ - { - "name": "repeat", - "value": 1 - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "conv_type", - "value": "standard" - }, - { - "name": "rate_mult", - "value": 2 - }, - { - "name": "kernel_size", - "value": 3 - } - ] - }, - { - "function_name": "dilated_residual", - "kwargs": "kwargs", - "params": [ - { - "name": "round", - "value": false - }, - { - "name": "batch_norm", - "value": false - }, - { - "name": "conv_type", - "value": "standard" - }, - { - "name": "repeat", - "value": 1 - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "rate_mult", - "value": 2 - }, - { - "name": "kernel_size", - "value": 3 - } - ] - }, - { - "function_name": "dilated_residual_2d", - "kwargs": "kwargs", - "params": [ - { - "name": "symmetric", - "value": true - }, - { - "name": "repeat", - "value": 1 - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "rate_mult", - "value": 2 - }, - { - "name": "kernel_size", - "value": 3 - } - ] - }, - { - "function_name": "dilated_residual_nac", - "kwargs": "kwargs", - "params": [ - { - "name": "repeat", - "value": 1 - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "rate_mult", - "value": 2 - }, - { - "name": "kernel_size", - "value": 3 - } - ] - }, - { - "function_name": "dot_to_2d", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "exp", - "kwargs": "kwargs", - "params": [ - { - "name": "minus", - "value": null - }, - { - "name": "base", - "value": null - } - ] - }, - { - "function_name": "factor_inverse", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "final", - "kwargs": "kwargs", - "params": [ - { - "name": "l1_scale", - "value": 0 - }, - { - "name": "l2_scale", - "value": 0 - }, - { - "name": "kernel_initializer", - "value": "he_normal" - }, - { - "name": "flatten", - "value": false - }, - { - "name": "activation", - "value": "linear" - } - ] - }, - { - "function_name": "geodot_to_2d", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "global_context", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "max_to_2d", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "multihead_attention", - "kwargs": "kwargs", - "params": [ - { - "name": "dense_expansion", - "value": 0 - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "position_dropout", - "value": 0 - }, - { - "name": "attention_dropout", - "value": 0 - }, - { - "name": "bn_momentum", - "value": 0.9 - }, - { - "name": "activation", - "value": "relu" - }, - { - "name": "num_position_features", - "value": null - }, - { - "name": "out_size", - "value": null - }, - { - "name": "heads", - "value": 1 - }, - { - "name": "key_size", - "value": null - } - ] - }, - { - "function_name": "one_to_two", - "kwargs": "kwargs", - "params": [ - { - "name": "operation", - "value": "mean" - } - ] - }, - { - "function_name": "position_encoding", - "kwargs": null, - "params": [ - { - "name": "min_rate", - "value": 0.0001 - } - ] - }, - { - "function_name": "res_tower", - "kwargs": "kwargs", - "params": [ - { - "name": "num_convs", - "value": 2 - }, - { - "name": "repeat", - "value": 1 - }, - { - "name": "divisible_by", - "value": 1 - }, - { - "name": "pool_size", - "value": 2 - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "kernel_size", - "value": 1 - }, - { - "name": "filters_mult", - "value": null - }, - { - "name": "filters_end", - "value": null - } - ] - }, - { - "function_name": "squeeze_excite", - "kwargs": "kwargs", - "params": [ - { - "name": "bn_momentum", - "value": 0.9 - }, - { - "name": "batch_norm", - "value": false - }, - { - "name": "additive", - "value": false - }, - { - "name": "bottleneck_ratio", - "value": 8 - }, - { - "name": "activation", - "value": "relu" - } - ] - }, - { - "function_name": "symmetrize_2d", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "transformer", - "kwargs": "kwargs", - "params": [ - { - "name": "dropout", - "value": 0.25 - }, - { - "name": "position_dropout", - "value": 0.01 - }, - { - "name": "attention_dropout", - "value": 0.05 - }, - { - "name": "dense_expansion", - "value": 2.0 - }, - { - "name": "activation", - "value": "relu" - }, - { - "name": "num_position_features", - "value": null - }, - { - "name": "out_size", - "value": null - }, - { - "name": "heads", - "value": 1 - }, - { - "name": "key_size", - "value": null - } - ] - }, - { - "function_name": "transformer_tower", - "kwargs": "kwargs", - "params": [ - { - "name": "repeat", - "value": 2 - } - ] - }, - { - "function_name": "upper_tri", - "kwargs": "kwargs", - "params": [ - { - "name": "diagonal_offset", - "value": 2 - } - ] - }, - { - "function_name": "wheeze_excite", - "kwargs": "kwargs", - "params": [] - }, - { - "function_name": "xception_block", - "kwargs": "kwargs", - "params": [ - { - "name": "pool_size", - "value": 2 - }, - { - "name": "dropout", - "value": 0 - }, - { - "name": "kernel_size", - "value": 1 - }, - { - "name": "filters", - "value": null - } - ] - }, - { - "function_name": "xception_tower", - "kwargs": "kwargs", - "params": [ - { - "name": "repeat", - "value": 1 - }, - { - "name": "filters_mult", - "value": 1 - } - ] - } -] \ No newline at end of file From 96f7d72c8ea16b05725dad147bafedf1a0df6854 Mon Sep 17 00:00:00 2001 From: Moody Rahman Date: Mon, 23 Aug 2021 20:32:11 -0400 Subject: [PATCH 22/22] reverted .gitignore --- .gitignore | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index e7f5eb86..6418acf3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,4 @@ basenji.egg-info/ */.DS_Store */._.DS_Store **/.ipynb_checkpoints/ -data/hg19.fa* -data/* -manuscripts/basset/model_basset \ No newline at end of file +data/hg19.fa* \ No newline at end of file