You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.
C++ Callstacks:
DataType of Paddle Op sequence_expand Y must be the same. Get (float) != (int64_t) at [/paddle/paddle/fluid/framework/operator.cc:1115]
from future import print_function
import os
import six
import numpy as np
import paddle
import paddle.fluid as fluid
for data in test_data():
src_word_id = fluid.create_lod_tensor(
data=[x[0] for x in data],
recursive_seq_lens=[[len(x[0]) for x in data]],
place=place)
init_ids = fluid.create_lod_tensor(
data=np.array([[0]] * len(data), dtype='int64'),
recursive_seq_lens=[[1] * len(data)] * 2,
place=place)
init_scores = fluid.create_lod_tensor(
data=np.array([[0.]] * len(data), dtype='float32'),
recursive_seq_lens=[[1] * len(data)] * 2,
place=place)
seq_ids, seq_scores = exe.run(
infer_prog,
feed={
'src_word_id': src_word_id,
'init_ids': init_ids,
'init_scores': init_scores
},
fetch_list=[translation_ids, translation_scores],
return_numpy=False)
# How to parse the results:
# Suppose the lod of seq_ids is:
# [[0, 3, 6], [0, 12, 24, 40, 54, 67, 82]]
# then from lod[0]:
# there are 2 source sentences, beam width is 3.
# from lod[1]:
# the first source sentence has 3 hyps; the lengths are 12, 12, 16
# the second source sentence has 3 hyps; the lengths are 14, 13, 15
hyps = [[] for i in range(len(seq_ids.lod()[0]) - 1)]
scores = [[] for i in range(len(seq_scores.lod()[0]) - 1)]
for i in range(len(seq_ids.lod()[0]) - 1): # for each source sentence
start = seq_ids.lod()[0][i]
end = seq_ids.lod()[0][i + 1]
print("Original sentence:")
print(" ".join([src_idx2word[idx] for idx in data[i][0][1:-1]]))
print("Translated score and sentence:")
for j in range(end - start): # for each candidate
sub_start = seq_ids.lod()[1][start + j]
sub_end = seq_ids.lod()[1][start + j + 1]
hyps[i].append(" ".join([
trg_idx2word[idx]
for idx in np.array(seq_ids)[sub_start:sub_end][1:-1]
]))
scores[i].append(np.array(seq_scores)[sub_end - 1])
print(scores[i][-1], hyps[i][-1].encode('utf8'))
def main(use_cuda):
train(use_cuda)
#infer(use_cuda)
if name == 'main':
use_cuda = False # set to True if training with GPU
main(use_cuda)
#infer(use_cuda)
The text was updated successfully, but these errors were encountered:
代码使用train.py中的代码,但是数据集是自己的,训练完成了,但是infer时报错:
EnforceNotMet: Invoke operator sequence_expand error.
C++ Callstacks:
DataType of Paddle Op sequence_expand Y must be the same. Get (float) != (int64_t) at [/paddle/paddle/fluid/framework/operator.cc:1115]
from future import print_function
import os
import six
import numpy as np
import paddle
import paddle.fluid as fluid
dict_size = 29364
source_dict_size = target_dict_size = dict_size
word_dim = 512
hidden_dim = 512
decoder_size = hidden_dim
max_length = 256
beam_size = 4
batch_size = 64
is_sparse = True
model_save_dir = "1_machine_translation.inference.model"
def encoder():
src_word_id = fluid.layers.data(
name="src_word_id", shape=[1], dtype='int64', lod_level=1)
src_embedding = fluid.layers.embedding(
input=src_word_id,
size=[source_dict_size, word_dim],
dtype='float32',
is_sparse=is_sparse,param_attr='shared_w')
fc_forward = fluid.layers.fc(
input=src_embedding, size=hidden_dim * 3, bias_attr=False)
src_forward = fluid.layers.dynamic_gru(input=fc_forward, size=hidden_dim)
fc_backward = fluid.layers.fc(
input=src_embedding, size=hidden_dim * 3, bias_attr=False)
src_backward = fluid.layers.dynamic_gru(
input=fc_backward, size=hidden_dim, is_reverse=True)
encoded_vector = fluid.layers.concat(
input=[src_forward, src_backward], axis=1)
return encoded_vector
def cell(x, hidden, encoder_out, encoder_out_proj):
def simple_attention(encoder_vec, encoder_proj, decoder_state):
decoder_state_proj = fluid.layers.fc(
input=decoder_state, size=decoder_size, bias_attr=False)
decoder_state_expand = fluid.layers.sequence_expand(
x=decoder_state_proj, y=encoder_proj)
mixed_state = fluid.layers.elementwise_add(encoder_proj,
decoder_state_expand)
attention_weights = fluid.layers.fc(
input=mixed_state, size=1, bias_attr=False)
attention_weights = fluid.layers.sequence_softmax(
input=attention_weights)
weigths_reshape = fluid.layers.reshape(x=attention_weights, shape=[-1])
scaled = fluid.layers.elementwise_mul(
x=encoder_vec, y=weigths_reshape, axis=0)
context = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
return context
context = simple_attention(encoder_out, encoder_out_proj, hidden)
out = fluid.layers.fc(
input=[x, context], size=decoder_size * 3, bias_attr=False)
out = fluid.layers.gru_unit(
input=out, hidden=hidden, size=decoder_size * 3)[0]
return out, out
def train_decoder(encoder_out):
encoder_last = fluid.layers.sequence_last_step(input=encoder_out)
encoder_last_proj = fluid.layers.fc(
input=encoder_last, size=decoder_size, act='tanh')
cache the encoder_out's computed result in attention
encoder_out_proj = fluid.layers.fc(
input=encoder_out, size=decoder_size, bias_attr=False)
trg_language_word = fluid.layers.data(
name="target_language_word", shape=[1], dtype='int64', lod_level=1)
trg_embedding = fluid.layers.embedding(
input=trg_language_word,
size=[target_dict_size, word_dim],
dtype='float32',
is_sparse=is_sparse,param_attr='shared_w')
rnn = fluid.layers.DynamicRNN()
with rnn.block():
x = rnn.step_input(trg_embedding)
pre_state = rnn.memory(init=encoder_last_proj, need_reorder=True)
encoder_out = rnn.static_input(encoder_out)
encoder_out_proj = rnn.static_input(encoder_out_proj)
out, current_state = cell(x, pre_state, encoder_out, encoder_out_proj)
prob = fluid.layers.fc(input=out, size=target_dict_size, act='softmax')
return rnn()
def train_model():
encoder_out = encoder()
rnn_out = train_decoder(encoder_out)
label = fluid.layers.data(
name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
cost = fluid.layers.cross_entropy(input=rnn_out, label=label)
avg_cost = fluid.layers.mean(cost)
return avg_cost
def optimizer_func():
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))
lr_decay = fluid.layers.learning_rate_scheduler.noam_decay(hidden_dim, 1000)
return fluid.optimizer.Adam(
learning_rate=lr_decay,
regularization=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4))
def train(use_cuda):
train_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(train_prog, startup_prog):
with fluid.unique_name.guard():
avg_cost = train_model()
optimizer = optimizer_func()
optimizer.minimize(avg_cost)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
#fluid.io.load_params(exe, model_save_dir, main_program=train_prog)
train_data = paddle.batch(
paddle.reader.shuffle(
train_reader1, buf_size=100000),
batch_size=batch_size)
feeder = fluid.DataFeeder(
feed_list=[
'src_word_id', 'target_language_word', 'target_language_next_word'
],
place=place,
program=train_prog)
exe.run(startup_prog)
EPOCH_NUM = 20
for pass_id in six.moves.xrange(EPOCH_NUM):
batch_id = 0
for data in train_data():
cost = exe.run(
train_prog, feed=feeder.feed(data), fetch_list=[avg_cost])[0]
print('pass_id: %d, batch_id: %d, loss: %f' % (pass_id, batch_id,
cost))
batch_id += 1
fluid.io.save_params(exe, model_save_dir, main_program=train_prog)
def infer_decoder(encoder_out):
encoder_last = fluid.layers.sequence_last_step(input=encoder_out)
encoder_last_proj = fluid.layers.fc(
input=encoder_last, size=decoder_size, act='tanh')
encoder_out_proj = fluid.layers.fc(
input=encoder_out, size=decoder_size, bias_attr=False)
max_len = fluid.layers.fill_constant(
shape=[1], dtype='int64', value=max_length)
counter = fluid.layers.zeros(shape=[1], dtype='int64', force_cpu=True)
init_ids = fluid.layers.data(
name="init_ids", shape=[1], dtype="int64", lod_level=2)
init_scores = fluid.layers.data(
name="init_scores", shape=[1], dtype="float32", lod_level=2)
create and init arrays to save selected ids, scores and states for each step
ids_array = fluid.layers.array_write(init_ids, i=counter)
scores_array = fluid.layers.array_write(init_scores, i=counter)
state_array = fluid.layers.array_write(encoder_last_proj, i=counter)
cond = fluid.layers.less_than(x=counter, y=max_len)
while_op = fluid.layers.While(cond=cond)
with while_op.block():
pre_ids = fluid.layers.array_read(array=ids_array, i=counter)
pre_score = fluid.layers.array_read(array=scores_array, i=counter)
pre_state = fluid.layers.array_read(array=state_array, i=counter)
translation_ids, translation_scores = fluid.layers.beam_search_decode(
ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=1)
return translation_ids, translation_scores
def infer_model():
encoder_out = encoder()
translation_ids, translation_scores = infer_decoder(encoder_out)
return translation_ids, translation_scores
def infer(use_cuda):
infer_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(infer_prog, startup_prog):
with fluid.unique_name.guard():
translation_ids, translation_scores = infer_model()
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
test_data = paddle.batch(
test_reader,
batch_size=batch_size)
src_idx2word = reverse_vocab
trg_idx2word = reverse_vocab
fluid.io.load_params(exe, model_save_dir, main_program=infer_prog)
for data in test_data():
src_word_id = fluid.create_lod_tensor(
data=[x[0] for x in data],
recursive_seq_lens=[[len(x[0]) for x in data]],
place=place)
init_ids = fluid.create_lod_tensor(
data=np.array([[0]] * len(data), dtype='int64'),
recursive_seq_lens=[[1] * len(data)] * 2,
place=place)
init_scores = fluid.create_lod_tensor(
data=np.array([[0.]] * len(data), dtype='float32'),
recursive_seq_lens=[[1] * len(data)] * 2,
place=place)
seq_ids, seq_scores = exe.run(
infer_prog,
feed={
'src_word_id': src_word_id,
'init_ids': init_ids,
'init_scores': init_scores
},
fetch_list=[translation_ids, translation_scores],
return_numpy=False)
# How to parse the results:
# Suppose the lod of seq_ids is:
# [[0, 3, 6], [0, 12, 24, 40, 54, 67, 82]]
# then from lod[0]:
# there are 2 source sentences, beam width is 3.
# from lod[1]:
# the first source sentence has 3 hyps; the lengths are 12, 12, 16
# the second source sentence has 3 hyps; the lengths are 14, 13, 15
hyps = [[] for i in range(len(seq_ids.lod()[0]) - 1)]
scores = [[] for i in range(len(seq_scores.lod()[0]) - 1)]
for i in range(len(seq_ids.lod()[0]) - 1): # for each source sentence
start = seq_ids.lod()[0][i]
end = seq_ids.lod()[0][i + 1]
print("Original sentence:")
print(" ".join([src_idx2word[idx] for idx in data[i][0][1:-1]]))
print("Translated score and sentence:")
for j in range(end - start): # for each candidate
sub_start = seq_ids.lod()[1][start + j]
sub_end = seq_ids.lod()[1][start + j + 1]
hyps[i].append(" ".join([
trg_idx2word[idx]
for idx in np.array(seq_ids)[sub_start:sub_end][1:-1]
]))
scores[i].append(np.array(seq_scores)[sub_end - 1])
print(scores[i][-1], hyps[i][-1].encode('utf8'))
def main(use_cuda):
train(use_cuda)
#infer(use_cuda)
if name == 'main':
use_cuda = False # set to True if training with GPU
main(use_cuda)
#infer(use_cuda)
The text was updated successfully, but these errors were encountered: