forked from sordonia/rnn-lm
-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathstate.py
executable file
·101 lines (78 loc) · 2.55 KB
/
state.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from collections import OrderedDict
def prototype_state():
state = {}
# Random seed
state['seed'] = 1234
# Logging level
state['level'] = 'DEBUG'
# These are unknown word placeholders
state['oov'] = '<unk>'
# Watch out for these
state['unk_sym'] = 0
state['eos_sym'] = 2
state['sos_sym'] = 1
state['n_samples'] = 40
# These are end-of-sequence marks
state['start_sym_sent'] = '<s>'
state['end_sym_sent'] = '</s>'
# Low-rank approximation activation function
state['rank_n_activ'] = 'lambda x: x'
# ----- SIZES ----
# Dimensionality of hidden layers
state['qdim'] = 512
# Dimensionality of low-rank approximation
state['rankdim'] = 256
# Threshold to clip the gradient
state['cutoff'] = 1.
state['lr'] = 0.0001
# Early stopping configuration
state['patience'] = 5
state['cost_threshold'] = 1.003
# ----- TRAINING METHOD -----
# Choose optimization algorithm
state['updater'] = 'adam'
# Batch size
state['bs'] = 128
# We take this many minibatches, merge them,
# sort the sentences according to their length and create
# this many new batches with less padding.
state['sort_k_batches'] = 20
# Maximum sequence length / trim batches
state['seqlen'] = 50
# Should we use a deep output layer
# and maxout on the outputs?
state['deep_out'] = True
state['maxout_out'] = True
state['step_type'] = 'gated'
state['rec_activation'] = "lambda x: T.tanh(x)"
# Maximum number of iterations
state['max_iters'] = 10
state['save_dir'] = './'
# ----- TRAINING PROCESS -----
# Frequency of training error reports (in number of batches)
state['trainFreq'] = 10
# Validation frequency
state['validFreq'] = 5000
# Number of batches to process
state['loopIters'] = 3000000
# Maximum number of minutes to run
state['timeStop'] = 24*60*31
# Error level to stop at
state['minerr'] = -1
return state
def prototype_test():
state = prototype_state()
state['train_sentences'] = "tests/data/test.word.train.pkl"
state['valid_sentences'] = "tests/data/test.word.valid.pkl"
state['dictionary'] = "tests/data/test.dict.pkl"
state['save_dir'] = "tests/models/"
state['prefix'] = "test_"
state['deep_out'] = True
state['maxout_out'] = False
#
state['qdim'] = 5
# Dimensionality of low-rank approximation
state['rankdim'] = 10
state['bs'] = 10
state['seqlen'] = 50
return state