diff --git a/elit/embedding.py b/elit/embedding.py index c613da6..f4150f5 100644 --- a/elit/embedding.py +++ b/elit/embedding.py @@ -20,9 +20,11 @@ import abc import fastText import numpy as np +import mxnet as mx from gensim.models import KeyedVectors from elit.nlp.language_models.contextual_string_model import ContextualStringModel +from elit.nlp.tagger.mxnet_util import mxnet_prefer_gpu from elit.structure import Document __author__ = 'Jinho D. Choi' @@ -160,13 +162,14 @@ class ContextualStringEmbedding(Embedding): :class:`ContextualStringEmbedding` is the context-based model proposed by `Akbik et al., 2018 `_. """ - def __init__(self, model_path: str, detach: bool = True): + def __init__(self, model_path: str, detach: bool = True, context: mx.Context = None): """ :param model_path: the path to the model file. :param detach: if `False`, the gradient will propagate into the language model, which dramatically slows down training and often leads to worse results. """ - self.lm = ContextualStringModel.load_language_model(model_path) + self.context = context if context else mxnet_prefer_gpu() + self.lm = ContextualStringModel.load_language_model(model_path, context) super().__init__(self.lm.embedding_size) self.detach = detach diff --git a/elit/nlp/dep/parser/dep_parser.py b/elit/nlp/dep/parser/dep_parser.py index 5446691..620679c 100644 --- a/elit/nlp/dep/parser/dep_parser.py +++ b/elit/nlp/dep/parser/dep_parser.py @@ -35,12 +35,13 @@ class DepParser(object): - def __init__(self, config_file_path, extra_args=None) -> None: + def __init__(self, config_file_path, context: mx.Context = None, extra_args=None) -> None: super().__init__() np.random.seed(666) self._config = ParserConfig(config_file_path, extra_args) self._parser = None self._vocab = None + self.context = context if context else mxnet_prefer_gpu() @property def vocab(self) -> ParserVocabulary: @@ -56,7 +57,7 @@ def train(self): vocab.save(self._config.save_vocab_path) vocab.log_info(logger) - with mx.Context(mxnet_prefer_gpu()): + with self.context: self._parser = parser = BiaffineParser(vocab, config.word_dims, config.tag_dims, config.dropout_emb, @@ -123,7 +124,7 @@ def train(self): def load(self): config = self._config self._vocab = vocab = ParserVocabulary.load(config.save_vocab_path) - with mx.Context(mxnet_prefer_gpu()): + with self.context: self._parser = BiaffineParser(vocab, config.word_dims, config.tag_dims, config.dropout_emb, config.lstm_layers, config.lstm_hiddens, config.dropout_lstm_input, config.dropout_lstm_hidden, @@ -136,7 +137,7 @@ def evaluate(self, logger=None): parser = self._parser vocab = self._vocab config = self._config - with mx.Context(mxnet_prefer_gpu()): + with self.context: UAS, LAS, speed = evaluate_official_script(parser, vocab, config.num_buckets_valid, config.test_batch_size, config.test_file, os.path.join(config.save_dir, 'valid_tmp')) if logger is None: @@ -155,7 +156,7 @@ def parse(self, sentence: list): for i, (word, tag) in enumerate(sentence): words[i + 1, 0], tags[i + 1, 0] = vocab.word2id(word.lower()), vocab.tag2id(tag) - with mx.Context(mxnet_prefer_gpu()): + with self.context: outputs = self._parser.run(words, tags, is_train=False) words = [] for arc, rel, (word, tag) in zip(outputs[0][0], outputs[0][1], sentence): @@ -173,4 +174,4 @@ def parse(self, sentence: list): parser.evaluate() sentence = [('Is', 'VBZ'), ('this', 'DT'), ('the', 'DT'), ('future', 'NN'), ('of', 'IN'), ('chamber', 'NN'), ('music', 'NN'), ('?', '.')] - print(parser.parse(sentence)) \ No newline at end of file + print(parser.parse(sentence)) diff --git a/elit/nlp/dep/parser/parser.py b/elit/nlp/dep/parser/parser.py index ac57ae9..3673183 100644 --- a/elit/nlp/dep/parser/parser.py +++ b/elit/nlp/dep/parser/parser.py @@ -27,7 +27,7 @@ from elit.component import NLPComponent from elit.nlp.dep.common.utils import init_logger, Progbar from elit.nlp.dep.parser import DEFAULT_CONFIG_FILE -from elit.nlp.dep.parser.biaffine_parser import BiaffineParser +from elit.nlp.dep.parser.biaffine_parser import BiaffineParser as _BiaffineParser from elit.nlp.dep.parser.common.data import ParserVocabulary, DataLoader, np, ConllSentence, ConllWord from elit.nlp.dep.parser.common.exponential_scheduler import ExponentialScheduler from elit.nlp.dep.parser.evaluate import evaluate_official_script @@ -41,11 +41,12 @@ class BiaffineParser(NLPComponent): An implementation of "Deep Biaffine Attention for Neural Dependency Parsing" Dozat and Manning (2016) """ - def __init__(self) -> None: + def __init__(self, context: mx.Context = None) -> None: super().__init__() self._config = None # type: ParserConfig self._vocab = None # type: ParserVocabulary self._parser = None # type: BiaffineParser + self.context = context if context else mxnet_prefer_gpu() def train(self, trn_docs: Sequence[Document], dev_docs: Sequence[Document], model_path: str, **kwargs) -> float: # read config file @@ -61,15 +62,9 @@ def train(self, trn_docs: Sequence[Document], dev_docs: Sequence[Document], mode logger = init_logger(config.save_dir) vocab.log_info(logger) # training - with mx.Context(mxnet_prefer_gpu()): + with self.context: - self._parser = parser = BiaffineParser(vocab, config.word_dims, config.tag_dims, - config.dropout_emb, - config.lstm_layers, - config.lstm_hiddens, config.dropout_lstm_input, - config.dropout_lstm_hidden, - config.mlp_arc_size, - config.mlp_rel_size, config.dropout_mlp, config.debug) + self._parser = parser = self._create_parser(config, vocab) parser.initialize() scheduler = ExponentialScheduler(config.learning_rate, config.decay, config.decay_steps) optimizer = mx.optimizer.Adam(config.learning_rate, config.beta_1, config.beta_2, config.epsilon, @@ -132,13 +127,14 @@ def decode(self, docs: Sequence[Document], **kwargs): record = data_loader.idx_sequence results = [None] * len(record) idx = 0 - for words, tags, arcs, rels in data_loader.get_batches( - batch_size=self._config.test_batch_size, shuffle=False): - outputs = self._parser.run(words, tags, is_train=False) - for output in outputs: - sent_idx = record[idx] - results[sent_idx] = output - idx += 1 + with self.context: + for words, tags, arcs, rels in data_loader.get_batches( + batch_size=self._config.test_batch_size, shuffle=False): + outputs = self._parser.run(words, tags, is_train=False) + for output in outputs: + sent_idx = record[idx] + results[sent_idx] = output + idx += 1 idx = 0 for d in docs: for s in d: @@ -155,10 +151,11 @@ def evaluate(self, docs: Sequence[Document], **kwargs): :return: (UAS, LAS, speed) speed is measured in sentences per second """ assert isinstance(docs, Sequence), 'Expect docs to be Sequence of Document' - UAS, LAS, speed = evaluate_official_script(self._parser, self._vocab, self._config.num_buckets_valid, - self._config.test_batch_size, - self._config.test_file, - None, documents=docs) + with self.context: + UAS, LAS, speed = evaluate_official_script(self._parser, self._vocab, self._config.num_buckets_valid, + self._config.test_batch_size, + self._config.test_file, + None, documents=docs) return UAS, LAS, speed def load(self, model_path: str, **kwargs): @@ -166,8 +163,8 @@ def load(self, model_path: str, **kwargs): return self self._config = ParserConfig(os.path.join(model_path, 'config.ini')) self._vocab = ParserVocabulary.load(self._config.save_vocab_path) - self._parser = self._create_parser(self._config, self._vocab) - pass + with self.context: + self._parser = self._create_parser(self._config, self._vocab) def save(self, model_path: str, **kwargs): self._config.save_dir = model_path @@ -194,20 +191,21 @@ def parse(self, sentence: Sequence[Tuple]) -> ConllSentence: for i, (word, tag) in enumerate(sentence): words[i + 1, 0], tags[i + 1, 0] = vocab.word2id(word.lower()), vocab.tag2id(tag) - outputs = self._parser.run(words, tags, is_train=False) + with self.context: + outputs = self._parser.run(words, tags, is_train=False) words = [] for arc, rel, (word, tag) in zip(outputs[0][0], outputs[0][1], sentence): words.append(ConllWord(id=len(words) + 1, form=word, pos=tag, head=arc, relation=vocab.id2rel(rel))) return ConllSentence(words) def _create_parser(self, config, vocab): - return BiaffineParser(vocab, config.word_dims, config.tag_dims, - config.dropout_emb, - config.lstm_layers, - config.lstm_hiddens, config.dropout_lstm_input, - config.dropout_lstm_hidden, - config.mlp_arc_size, - config.mlp_rel_size, config.dropout_mlp, config.debug) + return _BiaffineParser(vocab, config.word_dims, config.tag_dims, + config.dropout_emb, + config.lstm_layers, + config.lstm_hiddens, config.dropout_lstm_input, + config.dropout_lstm_hidden, + config.mlp_arc_size, + config.mlp_rel_size, config.dropout_mlp, config.debug) def _load_conll(path) -> Document: diff --git a/elit/nlp/language_models/contextual_string_model.py b/elit/nlp/language_models/contextual_string_model.py index 942dbfe..e9bb8ee 100644 --- a/elit/nlp/language_models/contextual_string_model.py +++ b/elit/nlp/language_models/contextual_string_model.py @@ -131,16 +131,17 @@ def freeze(self): self.collect_params().setattr('grad_req', 'null') @classmethod - def load_language_model(cls, model_file): + def load_language_model(cls, model_file, context: mx.Context = None): config = LanguageModelConfig.load(os.path.join(model_file, 'config.pkl')) - model = ContextualStringModel(config.dictionary, - config.is_forward_lm, - config.hidden_size, - config.nlayers, - config.embedding_size, - config.nout, - config.dropout) - model.load_parameters(os.path.join(model_file, 'model.bin'), ctx=mx.Context(mxnet_prefer_gpu())) + with context: + model = ContextualStringModel(config.dictionary, + config.is_forward_lm, + config.hidden_size, + config.nlayers, + config.embedding_size, + config.nout, + config.dropout) + model.load_parameters(os.path.join(model_file, 'model.bin'), ctx=context) return model @staticmethod @@ -222,7 +223,7 @@ def train(self, self.model.initialize() best_val_loss = 100000000 scheduler = ReduceLROnPlateau(lr=learning_rate, verbose=True, factor=anneal_factor, - patience=patience) + patience=patience) optimizer = mx.optimizer.SGD(learning_rate=learning_rate, lr_scheduler=scheduler) trainer = gluon.Trainer(self.model.collect_params(), optimizer=optimizer) @@ -400,10 +401,10 @@ def _convert_dumped_model(): def _train(): corpus = TextCorpus('data/raw') language_model = ContextualStringModel(corpus.dictionary, - is_forward_lm=False, - hidden_size=1024, - nlayers=1, - dropout=0.25) + is_forward_lm=False, + hidden_size=1024, + nlayers=1, + dropout=0.25) trainer = ContextualStringModelTrainer(language_model, corpus) trainer.train('data/model/lm-jumbo-backward1024', sequence_length=250, @@ -419,4 +420,4 @@ def _load(): if __name__ == '__main__': _train() # _convert_dumped_model() - # _load() \ No newline at end of file + # _load() diff --git a/elit/nlp/tagger/embeddings.py b/elit/nlp/tagger/embeddings.py index 8130ff0..a481c6c 100644 --- a/elit/nlp/tagger/embeddings.py +++ b/elit/nlp/tagger/embeddings.py @@ -20,13 +20,14 @@ import re from abc import abstractmethod from typing import Union, List - +import mxnet as mx import mxnet.ndarray as nd import numpy as np from mxnet.gluon import nn from elit.nlp.language_models.contextual_string_model import ContextualStringModel from elit.nlp.tagger.corpus import Sentence, Token, read_pretrained_embeddings +from elit.nlp.tagger.mxnet_util import mxnet_prefer_gpu class Embeddings(nn.Block): @@ -138,7 +139,7 @@ class CharLMEmbeddings(TokenEmbeddings): def forward(self, *args): pass - def __init__(self, model, detach: bool = True): + def __init__(self, model, detach: bool = True, context: mx.Context = None): super().__init__() """ @@ -154,18 +155,19 @@ def __init__(self, model, detach: bool = True): training and often leads to worse results, so not recommended. """ self.static_embeddings = detach - - self.lm = ContextualStringModel.load_language_model(model) + self.context = context if context else mxnet_prefer_gpu() + self.lm = ContextualStringModel.load_language_model(model, context=self.context) self.detach = detach if detach: self.lm.freeze() self.is_forward_lm = self.lm.is_forward_lm - dummy_sentence = Sentence() - dummy_sentence.add_token(Token('hello')) - embedded_dummy = self.embed(dummy_sentence) - self.__embedding_length = len(embedded_dummy[0].get_token(1).get_embedding()) + with self.context: + dummy_sentence = Sentence() + dummy_sentence.add_token(Token('hello')) + embedded_dummy = self.embed(dummy_sentence) + self.__embedding_length = len(embedded_dummy[0].get_token(1).get_embedding()) @property def embedding_length(self) -> int: @@ -274,4 +276,4 @@ def _add_embeddings_internal(self, sentences: List[Sentence]) -> List[Sentence]: for embedding in self.embeddings: embedding._add_embeddings_internal(sentences) - return sentences \ No newline at end of file + return sentences diff --git a/elit/nlp/tagger/ner_tagger.py b/elit/nlp/tagger/ner_tagger.py index 949edfd..73eb982 100644 --- a/elit/nlp/tagger/ner_tagger.py +++ b/elit/nlp/tagger/ner_tagger.py @@ -49,7 +49,8 @@ def train(self, trn_docs: Sequence[Document], dev_docs: Sequence[Document], mode def decode(self, docs: Sequence[Document], **kwargs): samples = NLPTaskDataFetcher.convert_elit_documents(docs) - sentences = self.tagger.predict(samples) + with self.context: + sentences = self.tagger.predict(samples) idx = 0 for d in docs: for s in d: @@ -59,7 +60,7 @@ def decode(self, docs: Sequence[Document], **kwargs): def evaluate(self, docs: Sequence[Document], **kwargs): print('test... ') - with mx.Context(mxnet_prefer_gpu()): + with self.context: trainer = SequenceTaggerTrainer(self.tagger, corpus=None, test_mode=True) test_score, test_fp, test_result = trainer.evaluate(NLPTaskDataFetcher.convert_elit_documents(docs), tempfile.gettempdir(), @@ -70,16 +71,15 @@ def evaluate(self, docs: Sequence[Document], **kwargs): if __name__ == '__main__': - with mx.Context(mxnet_prefer_gpu()): - tagger = NERTagger() - model_path = 'data/model/ner/jumbo' - # tagger.train(conll_to_documents('data/conll-03/debug/eng.trn'), conll_to_documents('data/conll-03/debug/eng.dev'), - # model_path, pretrained_embeddings='data/embedding/glove/glove.6B.100d.debug.txt', - # forward_language_model='data/model/lm-news-forward', - # backward_language_model='data/model/lm-news-backward', - # max_epochs=1) - tagger.load(model_path) - test = conll_to_documents('data/dat/en-ner.tst', headers={0: 'text', 1: 'pos', 2: 'ner'}) - sent = tagger.decode(test)[0][SENS][3] - print(sent[NER]) - print(tagger.evaluate(test)) + tagger = NERTagger(mx.gpu(3)) + model_path = 'data/model/ner/jumbo' + # tagger.train(conll_to_documents('data/conll-03/debug/eng.trn'), conll_to_documents('data/conll-03/debug/eng.dev'), + # model_path, pretrained_embeddings='data/embedding/glove/glove.6B.100d.debug.txt', + # forward_language_model='data/model/lm-news-forward', + # backward_language_model='data/model/lm-news-backward', + # max_epochs=1) + tagger.load(model_path) + test = conll_to_documents('data/dat/en-ner.tst', headers={0: 'text', 1: 'pos', 2: 'ner'}) + sent = tagger.decode(test)[0][SENS][3] + print(sent[NER]) + print(tagger.evaluate(test)) diff --git a/elit/nlp/tagger/pos_tagger.py b/elit/nlp/tagger/pos_tagger.py index 90c68ea..eb4ac19 100644 --- a/elit/nlp/tagger/pos_tagger.py +++ b/elit/nlp/tagger/pos_tagger.py @@ -34,7 +34,8 @@ def train(self, trn_docs: Sequence[Document], dev_docs: Sequence[Document], mode def decode(self, docs: Sequence[Document], **kwargs): samples = NLPTaskDataFetcher.convert_elit_documents(docs) - sentences = self.tagger.predict(samples) + with self.context: + sentences = self.tagger.predict(samples) idx = 0 for d in docs: for s in d: @@ -44,28 +45,28 @@ def decode(self, docs: Sequence[Document], **kwargs): def evaluate(self, docs: Sequence[Document], **kwargs): print('test... ') - trainer = SequenceTaggerTrainer(self.tagger, corpus=None, test_mode=True) - test_score, _, _ = trainer.evaluate(NLPTaskDataFetcher.convert_elit_documents(docs), - tempfile.gettempdir(), - evaluation_method='accuracy', - embeddings_in_memory=False) + with self.context: + trainer = SequenceTaggerTrainer(self.tagger, corpus=None, test_mode=True) + test_score, _, _ = trainer.evaluate(NLPTaskDataFetcher.convert_elit_documents(docs), + tempfile.gettempdir(), + evaluation_method='accuracy', + embeddings_in_memory=False) print('TEST \t%d\t' % test_score) return test_score if __name__ == '__main__': - tagger = POSTagger() + tagger = POSTagger(context=mx.gpu(3)) model_path = 'data/model/pos/wsj' - with mx.Context(mxnet_prefer_gpu()): - tagger.load(model_path) - # tagger.train(conll_to_documents('data/dat/en-pos.dev', headers={0: 'text', 1: 'pos'}), - # conll_to_documents('data/dat/en-pos.dev', headers={0: 'text', 1: 'pos'}), - # model_path, pretrained_embeddings='data/embedding/glove/glove.6B.100d.debug.txt', - # forward_language_model='data/model/lm-news-forward', - # backward_language_model='data/model/lm-news-backward', - # max_epochs=1, - # embeddings_in_memory=False) - test = conll_to_documents('data/dat/en-pos.tst', headers={0: 'text', 1: 'pos'}) - # sent = tagger.decode(test)[0][SENS][3] - # print(sent[POS]) - print(tagger.evaluate(test)) \ No newline at end of file + tagger.load(model_path) + # tagger.train(conll_to_documents('data/dat/en-pos.dev', headers={0: 'text', 1: 'pos'}), + # conll_to_documents('data/dat/en-pos.dev', headers={0: 'text', 1: 'pos'}), + # model_path, pretrained_embeddings='data/embedding/glove/glove.6B.100d.debug.txt', + # forward_language_model='data/model/lm-news-forward', + # backward_language_model='data/model/lm-news-backward', + # max_epochs=1, + # embeddings_in_memory=False) + test = conll_to_documents('data/dat/en-pos.tst', headers={0: 'text', 1: 'pos'}) + # sent = tagger.decode(test)[0][SENS][3] + # print(sent[POS]) + print(tagger.evaluate(test)) diff --git a/elit/nlp/tagger/sequence_tagger_model.py b/elit/nlp/tagger/sequence_tagger_model.py index 714ebb8..c36ef6d 100644 --- a/elit/nlp/tagger/sequence_tagger_model.py +++ b/elit/nlp/tagger/sequence_tagger_model.py @@ -166,32 +166,38 @@ def save(self, model_folder: str): self.save_parameters(model_path) @classmethod - def load_from_file(cls, model_folder, **kwargs): + def load_from_file(cls, model_folder, context: mx.Context = None, **kwargs): + if context is None: + context = mxnet_prefer_gpu() config_path = os.path.join(model_folder, 'config.pkl') with open(config_path, 'rb') as f: config = pickle.load(f) - embedding_types = [ - - WordEmbeddings('{}data/embedding/fasttext100.vec.txt'.format(kwargs.get('word_embedding_path', ''))), - - # comment in this line to use character embeddings - # CharacterEmbeddings(), - - # comment in these lines to use contextual string embeddings - CharLMEmbeddings('{}data/model/lm-news-forward'.format(kwargs.get('word_embedding_path', ''))), - CharLMEmbeddings('{}data/model/lm-news-backward'.format(kwargs.get('word_embedding_path', ''))), - ] - - embeddings = StackedEmbeddings(embeddings=embedding_types) - model = SequenceTagger( - hidden_size=config['hidden_size'], - embeddings=embeddings, - tag_dictionary=config['tag_dictionary'], - tag_type=config['tag_type'], - use_crf=config['use_crf'], - use_rnn=config['use_rnn'], - rnn_layers=config['rnn_layers']) - model.load_parameters(os.path.join(model_folder, 'model.bin'), ctx=mx.Context(mxnet_prefer_gpu())) + with context: + embedding_types = [ + + WordEmbeddings( + '{}data/embedding/fasttext100.vec.txt'.format(kwargs.get('word_embedding_path', ''))), + + # comment in this line to use character embeddings + # CharacterEmbeddings(), + + # comment in these lines to use contextual string embeddings + CharLMEmbeddings('{}data/model/lm-news-forward'.format(kwargs.get('word_embedding_path', '')), + context=context), + CharLMEmbeddings('{}data/model/lm-news-backward'.format(kwargs.get('word_embedding_path', '')), + context=context), + ] + + embeddings = StackedEmbeddings(embeddings=embedding_types) + model = SequenceTagger( + hidden_size=config['hidden_size'], + embeddings=embeddings, + tag_dictionary=config['tag_dictionary'], + tag_type=config['tag_type'], + use_crf=config['use_crf'], + use_rnn=config['use_rnn'], + rnn_layers=config['rnn_layers']) + model.load_parameters(os.path.join(model_folder, 'model.bin'), ctx=context) return model def forward(self, sentences: List[Sentence]) -> Tuple[nd.NDArray, nd.NDArray, List]: @@ -493,4 +499,4 @@ def forward(self, x: nd.NDArray): sent.add_token(Token('European', pos='NNP')) sent.add_token(Token('Union', pos='NNP')) result = tagger.predict(sent)[0] - print([t.text + '/' + t.tags['ner'] for t in result]) \ No newline at end of file + print([t.text + '/' + t.tags['ner'] for t in result]) diff --git a/elit/nlp/tagger/sequence_tagger_trainer.py b/elit/nlp/tagger/sequence_tagger_trainer.py index 92ed097..3eb6fd7 100644 --- a/elit/nlp/tagger/sequence_tagger_trainer.py +++ b/elit/nlp/tagger/sequence_tagger_trainer.py @@ -99,7 +99,8 @@ def train(self, patience: int = 2, save_model: bool = True, embeddings_in_memory: bool = True, - train_with_dev: bool = False) -> float: + train_with_dev: bool = False, + context: mx.Context = None) -> float: """ :param base_path: a folder to store model, log etc. @@ -134,7 +135,7 @@ def train(self, # At any point you can hit Ctrl + C to break out of training early. try: - with mx.Context(mxnet_prefer_gpu()): + with mx.Context(context if context else mxnet_prefer_gpu()): self.model.initialize() scheduler = ReduceLROnPlateau(lr=learning_rate, verbose=True, factor=anneal_factor, patience=patience, mode=anneal_mode) diff --git a/elit/nlp/tagger/tagger.py b/elit/nlp/tagger/tagger.py index a261c1e..2518524 100644 --- a/elit/nlp/tagger/tagger.py +++ b/elit/nlp/tagger/tagger.py @@ -16,39 +16,40 @@ class Tagger(NLPComponent): - def __init__(self) -> None: + def __init__(self, context: mx.Context = None) -> None: super().__init__() self.tagger = None + self.context = context if context else mxnet_prefer_gpu() def init(self, **kwargs): pass def load(self, model_path: str, **kwargs): - self.tagger = SequenceTagger.load_from_file(model_path, **kwargs) + self.tagger = SequenceTagger.load_from_file(model_path, context=self.context, **kwargs) def save(self, model_path: str, **kwargs): self.tagger.save(model_path) def _train(self, trn_docs: Sequence[Document], dev_docs: Sequence[Document], model_path: str, - pretrained_embeddings, - forward_language_model, - backward_language_model, - tag_type='ner', - learning_rate: float = 0.1, - mini_batch_size: int = 32, - max_epochs: int = 100, - anneal_factor: float = 0.5, - patience: int = 2, - save_model: bool = True, - embeddings_in_memory: bool = True, - train_with_dev: bool = False, - **kwargs) -> float: + pretrained_embeddings, + forward_language_model, + backward_language_model, + tag_type='ner', + learning_rate: float = 0.1, + mini_batch_size: int = 32, + max_epochs: int = 100, + anneal_factor: float = 0.5, + patience: int = 2, + save_model: bool = True, + embeddings_in_memory: bool = True, + train_with_dev: bool = False, + **kwargs) -> float: corpus = TaggedCorpus(NLPTaskDataFetcher.convert_elit_documents(trn_docs), NLPTaskDataFetcher.convert_elit_documents(dev_docs), []) tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) - with mx.Context(mxnet_prefer_gpu()): + with mx.Context(self.context): embedding_types = [ WordEmbeddings(pretrained_embeddings), @@ -57,17 +58,17 @@ def _train(self, trn_docs: Sequence[Document], dev_docs: Sequence[Document], mod # CharacterEmbeddings(), # comment in these lines to use contextual string embeddings - CharLMEmbeddings(forward_language_model), - CharLMEmbeddings(backward_language_model), + CharLMEmbeddings(forward_language_model, self.context), + CharLMEmbeddings(backward_language_model, self.context), ] embeddings = StackedEmbeddings(embeddings=embedding_types) self.tagger = SequenceTagger(hidden_size=256, - embeddings=embeddings, - tag_dictionary=tag_dictionary, - tag_type=tag_type, - use_crf=True) + embeddings=embeddings, + tag_dictionary=tag_dictionary, + tag_type=tag_type, + use_crf=True) trainer = SequenceTaggerTrainer(self.tagger, corpus, test_mode=False) @@ -78,4 +79,4 @@ def _train(self, trn_docs: Sequence[Document], dev_docs: Sequence[Document], mod patience, save_model, embeddings_in_memory, - train_with_dev) \ No newline at end of file + train_with_dev)