Skip to content

Commit

Permalink
Use existed multi-head attention layer
Browse files Browse the repository at this point in the history
  • Loading branch information
CyberZHG committed Nov 7, 2018
1 parent c1dd746 commit 27af8c4
Show file tree
Hide file tree
Showing 11 changed files with 17 additions and 327 deletions.
5 changes: 2 additions & 3 deletions keras_bert/bert.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import random
import keras
import numpy as np
from keras_self_attention import ScaledDotProductAttention
from .layers import (get_inputs, Embeddings, Transformer, MultiHeadAttention,
from keras_multi_head import MultiHeadAttention
from .layers import (get_inputs, Embeddings, Transformer,
FeedForward, Masked, Extract, LayerNormalization)
from .activations import gelu

Expand Down Expand Up @@ -98,7 +98,6 @@ def get_custom_objects():
"""Get all custom objects for loading saved models."""
return {
'Embeddings': Embeddings,
'ScaledDotProductAttention': ScaledDotProductAttention,
'MultiHeadAttention': MultiHeadAttention,
'FeedForward': FeedForward,
'LayerNormalization': LayerNormalization,
Expand Down
1 change: 0 additions & 1 deletion keras_bert/layers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from .wrapper import Wrapper
from .inputs import get_inputs
from .embedding import Embeddings
from .multi_head import MultiHeadAttention
from .feed_forward import FeedForward
from .layer_norm import LayerNormalization
from .transformer import Transformer
Expand Down
141 changes: 0 additions & 141 deletions keras_bert/layers/multi_head.py

This file was deleted.

5 changes: 3 additions & 2 deletions keras_bert/layers/transformer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import keras
from .multi_head import MultiHeadAttention
from keras_multi_head import MultiHeadAttention
from .layer_norm import LayerNormalization
from .feed_forward import FeedForward
from .wrapper import Wrapper
from ..activations import gelu


class Transformer(Wrapper):
Expand Down Expand Up @@ -46,8 +47,8 @@ def compute_mask(self, inputs, input_mask=None):
def build(self, input_shape):
layer = MultiHeadAttention(
head_num=self.head_num,
dropout_rate=self.dropout_rate,
trainable=self.trainable,
kernel_activation=gelu,
name='%s-MultiHead' % self.name,
)
self.layers[layer.name] = layer
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
numpy
tensorflow
Keras
keras-self-attention==0.30.0
keras-multi-head==0.7.0
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='keras-bert',
version='0.11.0',
version='0.13.0',
packages=find_packages(),
url='https://github.com/CyberZHG/keras-bert',
license='MIT',
Expand All @@ -13,6 +13,7 @@
install_requires=[
'numpy',
'keras',
'keras-multi-head==0.7.0',
],
classifiers=(
"Programming Language :: Python :: 2.7",
Expand Down
10 changes: 6 additions & 4 deletions tests/layers/test_layer_norm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import unittest
import keras
import numpy as np
from keras_bert.layers import LayerNormalization, MultiHeadAttention
from keras_multi_head import MultiHeadAttention
from keras_bert.activations import gelu
from keras_bert.layers import LayerNormalization


class TestLayerNorm(unittest.TestCase):
Expand Down Expand Up @@ -42,8 +44,8 @@ def test_fit(self):
)
att_layer = MultiHeadAttention(
head_num=3,
dropout_rate=1e-5,
name='MH'
kernel_activation=gelu,
name='Multi-Head-Attentions'
)(input_layer)
dense_layer = keras.layers.Dense(units=3, name='Dense-1')(att_layer)
norm_layer = LayerNormalization(
Expand All @@ -58,7 +60,7 @@ def test_fit(self):
model.compile(
optimizer=keras.optimizers.Adam(lr=1e-3),
loss='mse',
metrics=['mse'],
metrics={},
)
model.summary()

Expand Down
84 changes: 1 addition & 83 deletions tests/layers/test_masked.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import unittest
import random
import keras
import numpy as np
from keras_bert.layers import get_inputs, Embeddings, Transformer, Masked
Expand All @@ -15,7 +14,7 @@ def test_sample(self):
model.compile(
optimizer='adam',
loss='mse',
metrics=['mse'],
metrics={},
)
model.summary(line_length=120)
model.predict([
Expand Down Expand Up @@ -81,84 +80,3 @@ def test_mask_result(self):
[0, 1, 0, 1, 0, 0, 0, 0, 0, 0],
])
self.assertTrue(np.allclose(expect, predicts[1]))

def test_fit(self):
input_layer = keras.layers.Input(
shape=(15,),
name='Input',
)
embed_layer = keras.layers.Embedding(
input_dim=12,
output_dim=24,
mask_zero=True,
name='Embedding',
)(input_layer)
rnn_layer = keras.layers.Bidirectional(
keras.layers.LSTM(units=100, return_sequences=True),
name='Bi-LSTM',
)(embed_layer)
dense_layer = keras.layers.Dense(
units=12,
activation='softmax',
name='Dense',
)(rnn_layer)
mask_layer = keras.layers.Input(
shape=(None,),
name='Mask',
)
masked_layer = Masked(
name='Masked',
)([dense_layer, mask_layer])
model = keras.models.Model(
inputs=[input_layer, mask_layer],
outputs=masked_layer,
)
model.compile(
optimizer=keras.optimizers.Adam(lr=1e-4),
loss=keras.losses.sparse_categorical_crossentropy,
metrics=[keras.metrics.sparse_categorical_crossentropy],
)
model.summary(line_length=150)

def _generator(batch_size=32):
while True:
inputs, masked, outputs = [], [], []
for _ in range(batch_size):
inputs.append([])
masked.append([])
outputs.append([])
has_mask = False
for i in range(1, 11):
inputs[-1].append(i)
outputs[-1].append([i])
if random.random() < 0.3:
has_mask = True
inputs[-1][-1] = 11
masked[-1].append(1)
else:
masked[-1].append(0)
if not has_mask:
masked[-1][0] = 1
inputs[-1] += [0] * (15 - len(inputs[-1]))
masked[-1] += [0] * (15 - len(masked[-1]))
outputs[-1] += [[0]] * (15 - len(outputs[-1]))
yield [np.asarray(inputs), np.asarray(masked)], np.asarray(outputs)

model.fit_generator(
generator=_generator(),
steps_per_epoch=1000,
epochs=10,
validation_data=_generator(),
validation_steps=100,
callbacks=[
keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)
],
)
for inputs, outputs in _generator(batch_size=32):
predicts = model.predict(inputs)
actual = np.argmax(predicts, axis=-1)
for i in range(32):
for j in range(15):
if inputs[1][i][j]:
self.assertEqual(j + 1, actual[i][j])
break
Loading

0 comments on commit 27af8c4

Please sign in to comment.