forked from spyysalo/wvlib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert.py
executable file
·52 lines (40 loc) · 1.71 KB
/
convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python
"""Convert between word vector formats."""
import sys
import wvlib
def argparser():
try:
import argparse
except ImportError:
import compat.argparse as argparse
ap=argparse.ArgumentParser()
ap.add_argument('input', metavar='INFILE', help='input vector file')
ap.add_argument('output', metavar='OUTFILE', help='output vector file')
ap.add_argument('-i', '--input-format', default=None,
choices=wvlib.formats, help='input FILE format')
ap.add_argument('-n', '--normalize', default=False, action='store_true',
help='normalize vectors to unit length')
# only wvlib output supported at the moment
# ap.add_argument('-o', '--output-format', default=None,
# choices=wvlib.output_formats, help='output FILE format')
ap.add_argument('-r', '--max-rank', metavar='INT', default=None,
type=int, help='only load r most frequent words')
ap.add_argument('-v', '--vector-format', default=None,
choices=wvlib.vector_formats,
help='output vector format (with wvlib output)')
return ap
def main(argv=None):
if argv is None:
argv = sys.argv
options = argparser().parse_args(argv[1:])
if options.max_rank is not None and options.max_rank < 1:
raise ValueError('max-rank must be >= 1')
wv = wvlib.load(options.input, options.input_format,
max_rank=options.max_rank)
if options.normalize:
logging.info('normalize vectors to unit length')
wv.normalize()
wv.save(options.output, vector_format=options.vector_format)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))