Skip to content

Commit

Permalink
use specified gpu in tagger and parser fix #30
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Nov 26, 2018
1 parent 7eaf3f7 commit c8fc984
Show file tree
Hide file tree
Showing 10 changed files with 161 additions and 147 deletions.
7 changes: 5 additions & 2 deletions elit/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@
import abc
import fastText
import numpy as np
import mxnet as mx
from gensim.models import KeyedVectors

from elit.nlp.language_models.contextual_string_model import ContextualStringModel
from elit.nlp.tagger.mxnet_util import mxnet_prefer_gpu
from elit.structure import Document

__author__ = 'Jinho D. Choi'
Expand Down Expand Up @@ -160,13 +162,14 @@ class ContextualStringEmbedding(Embedding):
:class:`ContextualStringEmbedding` is the context-based model proposed by `Akbik et al., 2018 <http://aclweb.org/anthology/C18-1139>`_.
"""

def __init__(self, model_path: str, detach: bool = True):
def __init__(self, model_path: str, detach: bool = True, context: mx.Context = None):
"""
:param model_path: the path to the model file.
:param detach: if `False`, the gradient will propagate into the language model,
which dramatically slows down training and often leads to worse results.
"""
self.lm = ContextualStringModel.load_language_model(model_path)
self.context = context if context else mxnet_prefer_gpu()
self.lm = ContextualStringModel.load_language_model(model_path, context)
super().__init__(self.lm.embedding_size)

self.detach = detach
Expand Down
13 changes: 7 additions & 6 deletions elit/nlp/dep/parser/dep_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@


class DepParser(object):
def __init__(self, config_file_path, extra_args=None) -> None:
def __init__(self, config_file_path, context: mx.Context = None, extra_args=None) -> None:
super().__init__()
np.random.seed(666)
self._config = ParserConfig(config_file_path, extra_args)
self._parser = None
self._vocab = None
self.context = context if context else mxnet_prefer_gpu()

@property
def vocab(self) -> ParserVocabulary:
Expand All @@ -56,7 +57,7 @@ def train(self):
vocab.save(self._config.save_vocab_path)
vocab.log_info(logger)

with mx.Context(mxnet_prefer_gpu()):
with self.context:

self._parser = parser = BiaffineParser(vocab, config.word_dims, config.tag_dims,
config.dropout_emb,
Expand Down Expand Up @@ -123,7 +124,7 @@ def train(self):
def load(self):
config = self._config
self._vocab = vocab = ParserVocabulary.load(config.save_vocab_path)
with mx.Context(mxnet_prefer_gpu()):
with self.context:
self._parser = BiaffineParser(vocab, config.word_dims, config.tag_dims, config.dropout_emb,
config.lstm_layers,
config.lstm_hiddens, config.dropout_lstm_input, config.dropout_lstm_hidden,
Expand All @@ -136,7 +137,7 @@ def evaluate(self, logger=None):
parser = self._parser
vocab = self._vocab
config = self._config
with mx.Context(mxnet_prefer_gpu()):
with self.context:
UAS, LAS, speed = evaluate_official_script(parser, vocab, config.num_buckets_valid, config.test_batch_size,
config.test_file, os.path.join(config.save_dir, 'valid_tmp'))
if logger is None:
Expand All @@ -155,7 +156,7 @@ def parse(self, sentence: list):
for i, (word, tag) in enumerate(sentence):
words[i + 1, 0], tags[i + 1, 0] = vocab.word2id(word.lower()), vocab.tag2id(tag)

with mx.Context(mxnet_prefer_gpu()):
with self.context:
outputs = self._parser.run(words, tags, is_train=False)
words = []
for arc, rel, (word, tag) in zip(outputs[0][0], outputs[0][1], sentence):
Expand All @@ -173,4 +174,4 @@ def parse(self, sentence: list):
parser.evaluate()
sentence = [('Is', 'VBZ'), ('this', 'DT'), ('the', 'DT'), ('future', 'NN'), ('of', 'IN'), ('chamber', 'NN'),
('music', 'NN'), ('?', '.')]
print(parser.parse(sentence))
print(parser.parse(sentence))
60 changes: 29 additions & 31 deletions elit/nlp/dep/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from elit.component import NLPComponent
from elit.nlp.dep.common.utils import init_logger, Progbar
from elit.nlp.dep.parser import DEFAULT_CONFIG_FILE
from elit.nlp.dep.parser.biaffine_parser import BiaffineParser
from elit.nlp.dep.parser.biaffine_parser import BiaffineParser as _BiaffineParser
from elit.nlp.dep.parser.common.data import ParserVocabulary, DataLoader, np, ConllSentence, ConllWord
from elit.nlp.dep.parser.common.exponential_scheduler import ExponentialScheduler
from elit.nlp.dep.parser.evaluate import evaluate_official_script
Expand All @@ -41,11 +41,12 @@ class BiaffineParser(NLPComponent):
An implementation of "Deep Biaffine Attention for Neural Dependency Parsing" Dozat and Manning (2016)
"""

def __init__(self) -> None:
def __init__(self, context: mx.Context = None) -> None:
super().__init__()
self._config = None # type: ParserConfig
self._vocab = None # type: ParserVocabulary
self._parser = None # type: BiaffineParser
self.context = context if context else mxnet_prefer_gpu()

def train(self, trn_docs: Sequence[Document], dev_docs: Sequence[Document], model_path: str, **kwargs) -> float:
# read config file
Expand All @@ -61,15 +62,9 @@ def train(self, trn_docs: Sequence[Document], dev_docs: Sequence[Document], mode
logger = init_logger(config.save_dir)
vocab.log_info(logger)
# training
with mx.Context(mxnet_prefer_gpu()):
with self.context:

self._parser = parser = BiaffineParser(vocab, config.word_dims, config.tag_dims,
config.dropout_emb,
config.lstm_layers,
config.lstm_hiddens, config.dropout_lstm_input,
config.dropout_lstm_hidden,
config.mlp_arc_size,
config.mlp_rel_size, config.dropout_mlp, config.debug)
self._parser = parser = self._create_parser(config, vocab)
parser.initialize()
scheduler = ExponentialScheduler(config.learning_rate, config.decay, config.decay_steps)
optimizer = mx.optimizer.Adam(config.learning_rate, config.beta_1, config.beta_2, config.epsilon,
Expand Down Expand Up @@ -132,13 +127,14 @@ def decode(self, docs: Sequence[Document], **kwargs):
record = data_loader.idx_sequence
results = [None] * len(record)
idx = 0
for words, tags, arcs, rels in data_loader.get_batches(
batch_size=self._config.test_batch_size, shuffle=False):
outputs = self._parser.run(words, tags, is_train=False)
for output in outputs:
sent_idx = record[idx]
results[sent_idx] = output
idx += 1
with self.context:
for words, tags, arcs, rels in data_loader.get_batches(
batch_size=self._config.test_batch_size, shuffle=False):
outputs = self._parser.run(words, tags, is_train=False)
for output in outputs:
sent_idx = record[idx]
results[sent_idx] = output
idx += 1
idx = 0
for d in docs:
for s in d:
Expand All @@ -155,19 +151,20 @@ def evaluate(self, docs: Sequence[Document], **kwargs):
:return: (UAS, LAS, speed) speed is measured in sentences per second
"""
assert isinstance(docs, Sequence), 'Expect docs to be Sequence of Document'
UAS, LAS, speed = evaluate_official_script(self._parser, self._vocab, self._config.num_buckets_valid,
self._config.test_batch_size,
self._config.test_file,
None, documents=docs)
with self.context:
UAS, LAS, speed = evaluate_official_script(self._parser, self._vocab, self._config.num_buckets_valid,
self._config.test_batch_size,
self._config.test_file,
None, documents=docs)
return UAS, LAS, speed

def load(self, model_path: str, **kwargs):
if self._parser: # already loaded, ignore
return self
self._config = ParserConfig(os.path.join(model_path, 'config.ini'))
self._vocab = ParserVocabulary.load(self._config.save_vocab_path)
self._parser = self._create_parser(self._config, self._vocab)
pass
with self.context:
self._parser = self._create_parser(self._config, self._vocab)

def save(self, model_path: str, **kwargs):
self._config.save_dir = model_path
Expand All @@ -194,20 +191,21 @@ def parse(self, sentence: Sequence[Tuple]) -> ConllSentence:
for i, (word, tag) in enumerate(sentence):
words[i + 1, 0], tags[i + 1, 0] = vocab.word2id(word.lower()), vocab.tag2id(tag)

outputs = self._parser.run(words, tags, is_train=False)
with self.context:
outputs = self._parser.run(words, tags, is_train=False)
words = []
for arc, rel, (word, tag) in zip(outputs[0][0], outputs[0][1], sentence):
words.append(ConllWord(id=len(words) + 1, form=word, pos=tag, head=arc, relation=vocab.id2rel(rel)))
return ConllSentence(words)

def _create_parser(self, config, vocab):
return BiaffineParser(vocab, config.word_dims, config.tag_dims,
config.dropout_emb,
config.lstm_layers,
config.lstm_hiddens, config.dropout_lstm_input,
config.dropout_lstm_hidden,
config.mlp_arc_size,
config.mlp_rel_size, config.dropout_mlp, config.debug)
return _BiaffineParser(vocab, config.word_dims, config.tag_dims,
config.dropout_emb,
config.lstm_layers,
config.lstm_hiddens, config.dropout_lstm_input,
config.dropout_lstm_hidden,
config.mlp_arc_size,
config.mlp_rel_size, config.dropout_mlp, config.debug)


def _load_conll(path) -> Document:
Expand Down
31 changes: 16 additions & 15 deletions elit/nlp/language_models/contextual_string_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,16 +131,17 @@ def freeze(self):
self.collect_params().setattr('grad_req', 'null')

@classmethod
def load_language_model(cls, model_file):
def load_language_model(cls, model_file, context: mx.Context = None):
config = LanguageModelConfig.load(os.path.join(model_file, 'config.pkl'))
model = ContextualStringModel(config.dictionary,
config.is_forward_lm,
config.hidden_size,
config.nlayers,
config.embedding_size,
config.nout,
config.dropout)
model.load_parameters(os.path.join(model_file, 'model.bin'), ctx=mx.Context(mxnet_prefer_gpu()))
with context:
model = ContextualStringModel(config.dictionary,
config.is_forward_lm,
config.hidden_size,
config.nlayers,
config.embedding_size,
config.nout,
config.dropout)
model.load_parameters(os.path.join(model_file, 'model.bin'), ctx=context)
return model

@staticmethod
Expand Down Expand Up @@ -222,7 +223,7 @@ def train(self,
self.model.initialize()
best_val_loss = 100000000
scheduler = ReduceLROnPlateau(lr=learning_rate, verbose=True, factor=anneal_factor,
patience=patience)
patience=patience)
optimizer = mx.optimizer.SGD(learning_rate=learning_rate, lr_scheduler=scheduler)
trainer = gluon.Trainer(self.model.collect_params(),
optimizer=optimizer)
Expand Down Expand Up @@ -400,10 +401,10 @@ def _convert_dumped_model():
def _train():
corpus = TextCorpus('data/raw')
language_model = ContextualStringModel(corpus.dictionary,
is_forward_lm=False,
hidden_size=1024,
nlayers=1,
dropout=0.25)
is_forward_lm=False,
hidden_size=1024,
nlayers=1,
dropout=0.25)
trainer = ContextualStringModelTrainer(language_model, corpus)
trainer.train('data/model/lm-jumbo-backward1024',
sequence_length=250,
Expand All @@ -419,4 +420,4 @@ def _load():
if __name__ == '__main__':
_train()
# _convert_dumped_model()
# _load()
# _load()
20 changes: 11 additions & 9 deletions elit/nlp/tagger/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@
import re
from abc import abstractmethod
from typing import Union, List

import mxnet as mx
import mxnet.ndarray as nd
import numpy as np
from mxnet.gluon import nn

from elit.nlp.language_models.contextual_string_model import ContextualStringModel
from elit.nlp.tagger.corpus import Sentence, Token, read_pretrained_embeddings
from elit.nlp.tagger.mxnet_util import mxnet_prefer_gpu


class Embeddings(nn.Block):
Expand Down Expand Up @@ -138,7 +139,7 @@ class CharLMEmbeddings(TokenEmbeddings):
def forward(self, *args):
pass

def __init__(self, model, detach: bool = True):
def __init__(self, model, detach: bool = True, context: mx.Context = None):
super().__init__()

"""
Expand All @@ -154,18 +155,19 @@ def __init__(self, model, detach: bool = True):
training and often leads to worse results, so not recommended.
"""
self.static_embeddings = detach

self.lm = ContextualStringModel.load_language_model(model)
self.context = context if context else mxnet_prefer_gpu()
self.lm = ContextualStringModel.load_language_model(model, context=self.context)
self.detach = detach
if detach:
self.lm.freeze()

self.is_forward_lm = self.lm.is_forward_lm

dummy_sentence = Sentence()
dummy_sentence.add_token(Token('hello'))
embedded_dummy = self.embed(dummy_sentence)
self.__embedding_length = len(embedded_dummy[0].get_token(1).get_embedding())
with self.context:
dummy_sentence = Sentence()
dummy_sentence.add_token(Token('hello'))
embedded_dummy = self.embed(dummy_sentence)
self.__embedding_length = len(embedded_dummy[0].get_token(1).get_embedding())

@property
def embedding_length(self) -> int:
Expand Down Expand Up @@ -274,4 +276,4 @@ def _add_embeddings_internal(self, sentences: List[Sentence]) -> List[Sentence]:
for embedding in self.embeddings:
embedding._add_embeddings_internal(sentences)

return sentences
return sentences
30 changes: 15 additions & 15 deletions elit/nlp/tagger/ner_tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ def train(self, trn_docs: Sequence[Document], dev_docs: Sequence[Document], mode

def decode(self, docs: Sequence[Document], **kwargs):
samples = NLPTaskDataFetcher.convert_elit_documents(docs)
sentences = self.tagger.predict(samples)
with self.context:
sentences = self.tagger.predict(samples)
idx = 0
for d in docs:
for s in d:
Expand All @@ -59,7 +60,7 @@ def decode(self, docs: Sequence[Document], **kwargs):

def evaluate(self, docs: Sequence[Document], **kwargs):
print('test... ')
with mx.Context(mxnet_prefer_gpu()):
with self.context:
trainer = SequenceTaggerTrainer(self.tagger, corpus=None, test_mode=True)
test_score, test_fp, test_result = trainer.evaluate(NLPTaskDataFetcher.convert_elit_documents(docs),
tempfile.gettempdir(),
Expand All @@ -70,16 +71,15 @@ def evaluate(self, docs: Sequence[Document], **kwargs):


if __name__ == '__main__':
with mx.Context(mxnet_prefer_gpu()):
tagger = NERTagger()
model_path = 'data/model/ner/jumbo'
# tagger.train(conll_to_documents('data/conll-03/debug/eng.trn'), conll_to_documents('data/conll-03/debug/eng.dev'),
# model_path, pretrained_embeddings='data/embedding/glove/glove.6B.100d.debug.txt',
# forward_language_model='data/model/lm-news-forward',
# backward_language_model='data/model/lm-news-backward',
# max_epochs=1)
tagger.load(model_path)
test = conll_to_documents('data/dat/en-ner.tst', headers={0: 'text', 1: 'pos', 2: 'ner'})
sent = tagger.decode(test)[0][SENS][3]
print(sent[NER])
print(tagger.evaluate(test))
tagger = NERTagger(mx.gpu(3))
model_path = 'data/model/ner/jumbo'
# tagger.train(conll_to_documents('data/conll-03/debug/eng.trn'), conll_to_documents('data/conll-03/debug/eng.dev'),
# model_path, pretrained_embeddings='data/embedding/glove/glove.6B.100d.debug.txt',
# forward_language_model='data/model/lm-news-forward',
# backward_language_model='data/model/lm-news-backward',
# max_epochs=1)
tagger.load(model_path)
test = conll_to_documents('data/dat/en-ner.tst', headers={0: 'text', 1: 'pos', 2: 'ner'})
sent = tagger.decode(test)[0][SENS][3]
print(sent[NER])
print(tagger.evaluate(test))
Loading

0 comments on commit c8fc984

Please sign in to comment.