-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathmain.lua
67 lines (59 loc) · 2.05 KB
/
main.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
-- Implementation of fasttext(https://arxiv.org/abs/1607.01759) using Torch
-- Author: Junwei Pan, Yahoo Inc.
-- Date: Aug 2, 2016
require("io")
require("os")
require("paths")
require("torch")
dofile("fasttext.lua")
-- Default configuration
config = {}
config.corpus_train = "data/ag_news_csv/train.csv" -- train data
config.corpus_test = "data/ag_news_csv/test.csv" -- test data
config.dim = 10 -- dimensionality of word embeddings
config.minfreq = 10 --threshold for vocab frequency
config.lr = 0.5 -- initial learning rate
config.decay = 0 -- whether to decay the learning rate
config.min_lr = 0.001 -- min learning rate
config.epochs = 5 -- number of epochs to train
config.stream = 0 -- 1 = stream from hard drive 0 = copy to memory first
config.n_gram = 1 -- n_gram: 1 for unigram, 2 for bigram, 3 for trigram
config.suffix = "" -- suffix for model id
config.title = 1 -- whether to use title
config.description = 1 -- whether to use description
-- Parse input arguments
cmd = torch.CmdLine()
cmd:option("-corpus_train", config.corpus_train)
cmd:option("-corpus_test", config.corpus_test)
cmd:option("-minfreq", config.minfreq)
cmd:option("-dim", config.dim)
cmd:option("-lr", config.lr)
cmd:option("-decay", config.decay)
cmd:option("-min_lr", config.min_lr)
cmd:option("-epochs", config.epochs)
cmd:option("-stream", config.stream)
cmd:option("-suffix", config.suffix)
cmd:option("-n_gram", config.n_gram)
cmd:option("-title", config.title)
cmd:option("-description", config.description)
params = cmd:parse(arg)
for param, value in pairs(params) do
config[param] = value
end
for i,j in pairs(config) do
print(i..": "..j)
end
-- Train model
m = FastText(config)
m:build_vocab(config.corpus_train)
--m:build_table()
for k = 1, config.epochs do
m.lr = config.lr -- reset learning rate at each epoch
m:train_model(config.corpus_train)
m:test_model(config.corpus_test)
end
-- Save the model as well as the word vectors
path_model = 'model/model_' .. config.suffix
path_vector = 'model/vector_' .. config.suffix
m:save_model(path_model)
m:save_vector(path_vector)