-
Notifications
You must be signed in to change notification settings - Fork 1
/
config_example.yaml
57 lines (57 loc) · 1.82 KB
/
config_example.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
mode: "train"
cache_folder: "data" #"/backup/IR/cache"
corpora:
name: "small_bioasq"
folder: "data\\pubmed_small.tar.gz" #/backup/pubmed_archive_json/pubmed_small.tar.gz
files_are_compressed: true #(optinal) default is false
queries:
train_file: "data\\small_train.json" #/backup/BioASQ-training7b
validation_file: "data\\small_validation.json"
pipeline:
- BM25_ES:
top_k: 2500
address: "<ip:port>"
tokenizer:
Regex:
n_process: 20
stem: true
evaluation: true
- DeepRank:
top_k: 10
evaluation: true
tokenizer:
Regex:
n_process: 20
stem: false
sw_file: "tokenizers\\stop_words.json"
queries_sw: true # stop words
articles_sw: false # stop words
embedding:
FastText:
trainable: false
path: "data\\fast_text.bin"
input_network:
Q: 13 #number max of query tokens
P: 5 #number max of snippets per query token
S: 15 #number max of snippet tokens
measure_network:
MeasureNetwork:
activation: "selu"
filters: 100
gru_bidirectional: False
gru_dim: 56
kernel:
- 3 #x
- 3 #y
aggregation_network:
AggregationNetwork:
activation: "selu"
hyperparameters:
epoch: 120
batch_size: 128
optimizer:
name: "adadelta" #(optinal) default is AdaDelta
learning_rate: 2
l2_regularization: 0.0001 #(optinal) default is 0.0001
num_partially_positive_samples: 3
num_negative_samples: 4