Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add transfer modules #40

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
337b053
transfer_learn
Sep 19, 2023
6e79986
create_model_with_adapter_by_specify_json
Sep 20, 2023
ae31d2f
lora
Sep 21, 2023
0ba9122
update lora ia3
Oct 3, 2023
c5db687
pull from main, keep my own setup.cfg
Oct 3, 2023
9a45989
implement lora/ia3 merge weight
Nov 11, 2023
9db2136
implement lora/ia3 merge weight
Nov 11, 2023
6502e52
add gene eval
hy395 Jan 29, 2024
9fbfad3
merge from main 1.29.24
hy395 Jan 29, 2024
21271c9
add --f16 for float16 inference
hy395 Apr 24, 2024
e1be2c8
make the model take variable size input
hy395 May 14, 2024
cdf3634
fix ia3
hy395 Jun 18, 2024
c5b6724
add se_adapter and locon
hy395 Jul 3, 2024
cd38eee
dont specify tf version
hy395 Jul 3, 2024
e39e65f
change back tf version
hy395 Jul 3, 2024
4e7aa5b
Merge remote-tracking branch 'origin/main' into transfer
hy395 Jul 8, 2024
061ad81
add log_dir argument
hy395 Sep 4, 2024
09cad18
untrack borzoi_test_gene.py
hy395 Oct 4, 2024
d500a26
move transfer param to json
hy395 Oct 4, 2024
101346c
add transfer tutorial
hy395 Oct 14, 2024
1963ce3
add transfer tutorial
hy395 Oct 14, 2024
9559c76
move transfer.py out of helper
hy395 Oct 18, 2024
902ad29
black format
davek44 Oct 27, 2024
ebbb789
setting aside nfs-dependent tests
davek44 Oct 27, 2024
6672d05
make gpumemorycallback cpu compatible
hy395 Nov 5, 2024
aa4e358
Untrack tests/test_transfer
hy395 Nov 5, 2024
527ba32
Untrack tests/test_transfer
hy395 Nov 5, 2024
44cf223
black
hy395 Nov 5, 2024
9132807
fix bug on json param
hy395 Dec 20, 2024
a636082
update tutorial
hy395 Dec 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions src/baskerville/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def conv_dna(
kernel_initializer=kernel_initializer,
kernel_regularizer=tf.keras.regularizers.l2(l2_scale),
)(current)

# squeeze-excite
if se:
current = squeeze_excite(current)
Expand Down Expand Up @@ -1109,6 +1109,9 @@ def transformer(
qkv_width=1,
mha_initializer="he_normal",
kernel_initializer="he_normal",
adapter=None,
latent=16,
davek44 marked this conversation as resolved.
Show resolved Hide resolved
seqlen_train=None,
**kwargs,
):
"""Construct a transformer block.
Expand Down Expand Up @@ -1140,20 +1143,25 @@ def transformer(
initializer=mha_initializer,
l2_scale=mha_l2_scale,
qkv_width=qkv_width,
seqlen_train=seqlen_train
)(current)

# dropout
if dropout > 0:
current = tf.keras.layers.Dropout(dropout)(current)

# add houlsby-adapter
if adapter=='houlsby':
current = layers.AdapterHoulsby(latent_size=latent)(current)

# residual
current = tf.keras.layers.Add()([inputs, current])

if dense_expansion == 0:
final = current
else:
final = transformer_dense(
current, out_size, dense_expansion, l2_scale, dropout, kernel_initializer
current, out_size, dense_expansion, l2_scale, dropout, kernel_initializer, adapter, latent
)

return final
Expand Down Expand Up @@ -1265,7 +1273,8 @@ def transformer_split(


def transformer_dense(
inputs, out_size, dense_expansion, l2_scale, dropout, kernel_initializer
inputs, out_size, dense_expansion, l2_scale, dropout, kernel_initializer,
adapter=None, latent=16
):
"""Transformer block dense portion."""
# layer norm
Expand Down Expand Up @@ -1297,6 +1306,9 @@ def transformer_dense(
if dropout > 0:
current = tf.keras.layers.Dropout(dropout)(current)

if adapter=='houlsby':
current = layers.AdapterHoulsby(latent_size=latent)(current)

# residual
final = tf.keras.layers.Add()([inputs, current])

Expand Down Expand Up @@ -1439,11 +1451,20 @@ def squeeze_excite(
additive=False,
norm_type=None,
bn_momentum=0.9,
kernel_initializer='glorot_uniform',
use_bias=True,
scale_fun='sigmoid',
**kwargs,
):
return layers.SqueezeExcite(
activation, additive, bottleneck_ratio, norm_type, bn_momentum
)(inputs)
activation=activation,
additive=additive,
bottleneck_ratio=bottleneck_ratio,
norm_type=norm_type,
bn_momentum=bn_momentum,
kernel_initializer=kernel_initializer,
scale_fun=scale_fun,
use_bias=use_bias)(inputs)


def wheeze_excite(inputs, pool_size, **kwargs):
Expand Down
Loading
Loading