Skip to content

Commit

Permalink
fail fast when embedding file not exists #32
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Nov 28, 2018
1 parent fd959c5 commit 92a26c2
Showing 1 changed file with 13 additions and 17 deletions.
30 changes: 13 additions & 17 deletions elit/nlp/dep/parser/common/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,29 +154,25 @@ def has_pret_embs(self):
def get_pret_embs(self, word_dims=None):
assert (self._pret_file is not None), "No pretrained file provided."
embs = [[]] * len(self._id2word)
train = True
try:
with open(self._pret_file) as f:
dim = None
for line in f:
line = line.strip().split()
if len(line) > 2:
if dim is None:
dim = len(line)
else:
if len(line) != dim:
continue
word, data = line[0], line[1:]
embs[self._word2id[word]] = data
except FileNotFoundError:
train = False
with open(self._pret_file) as f:
dim = None
for line in f:
line = line.strip().split()
if len(line) > 2:
if dim is None:
dim = len(line)
else:
if len(line) != dim:
continue
word, data = line[0], line[1:]
embs[self._word2id[word]] = data
if word_dims is None:
word_dims = len(data)
for idx, emb in enumerate(embs):
if not emb:
embs[idx] = np.zeros(word_dims)
pret_embs = np.array(embs, dtype=np.float32)
return pret_embs / np.std(pret_embs) if train else pret_embs
return pret_embs / np.std(pret_embs)

def get_word_embs(self, word_dims):
if self._pret_file is not None:
Expand Down

0 comments on commit 92a26c2

Please sign in to comment.