From 92a26c25f487a4aef7efb4ea6cf312d632e945f1 Mon Sep 17 00:00:00 2001 From: hankcs Date: Wed, 28 Nov 2018 01:34:00 -0500 Subject: [PATCH] fail fast when embedding file not exists https://github.com/elitcloud/elit/issues/32 --- elit/nlp/dep/parser/common/data.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/elit/nlp/dep/parser/common/data.py b/elit/nlp/dep/parser/common/data.py index eaeb2d5..ddbc0f8 100644 --- a/elit/nlp/dep/parser/common/data.py +++ b/elit/nlp/dep/parser/common/data.py @@ -154,29 +154,25 @@ def has_pret_embs(self): def get_pret_embs(self, word_dims=None): assert (self._pret_file is not None), "No pretrained file provided." embs = [[]] * len(self._id2word) - train = True - try: - with open(self._pret_file) as f: - dim = None - for line in f: - line = line.strip().split() - if len(line) > 2: - if dim is None: - dim = len(line) - else: - if len(line) != dim: - continue - word, data = line[0], line[1:] - embs[self._word2id[word]] = data - except FileNotFoundError: - train = False + with open(self._pret_file) as f: + dim = None + for line in f: + line = line.strip().split() + if len(line) > 2: + if dim is None: + dim = len(line) + else: + if len(line) != dim: + continue + word, data = line[0], line[1:] + embs[self._word2id[word]] = data if word_dims is None: word_dims = len(data) for idx, emb in enumerate(embs): if not emb: embs[idx] = np.zeros(word_dims) pret_embs = np.array(embs, dtype=np.float32) - return pret_embs / np.std(pret_embs) if train else pret_embs + return pret_embs / np.std(pret_embs) def get_word_embs(self, word_dims): if self._pret_file is not None: