-
Notifications
You must be signed in to change notification settings - Fork 2
/
dvc.yaml
79 lines (79 loc) · 2.2 KB
/
dvc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
vars:
- final-data-train: data/train.csv
- final-data-eval: data/eval.csv
- final-data-test: data/test.csv
- final-data-eval-orig: data/eval-input.txt
- final-data-eval-cor: data/eval-target.txt
- final-data-test-orig: data/test-input.txt
- final-data-test-cor: data/test-target.txt
- final-model: models/best_model/pytorch_model.bin
- final-predictions: outputs/predictions.txt
- edits-hyp: outputs/edits-hyp.txt
- edits-gold: data/edits-gold.txt
- results: results.json
- models-dir: models/
stages:
prepare-data:
cmd: python -m src.prepare_dataset
deps:
- src/prepare_dataset.py
- src/data.py
outs:
- ${final-data-train}
- ${final-data-eval}
- ${final-data-test}
prepare-data-errant-eval:
cmd: errant_parallel -orig ${final-data-eval-orig} -cor ${final-data-eval-cor} -out ${edits-gold}
deps:
- ${final-data-eval-orig}
- ${final-data-eval-cor}
outs:
- ${edits-gold}
train:
cmd: python -m src.train
--models_dir=${models-dir}
--train_csv=${final-data-train}
--eval_csv=${final-data-eval}
--edits_gold=${edits-gold}
--model_name=${model_name}
--batch_size=${batch_size}
--learning_rate=${learning_rate}
--num_train_epochs=${epochs}
--task_prefix ${task_prefix}
params:
- model_name
- batch_size
- learning_rate
- epochs
deps:
- src/train.py
- ${final-data-train}
- ${final-data-eval}
- ${edits-gold}
outs:
- ${final-model}
tag:
cmd: python -m src.tag
--model_name outputs/best_model/
--test_csv ${final-data-test}
--task_prefix ${task_prefix}
--out_path ${final-predictions}
deps:
- src/tag.py
- ${final-model}
- ${final-data-test}
outs:
- ${final-predictions}
evaluate:
cmd: errant_parallel -orig ${final-data-test-orig} -cor ${final-predictions} -out ${edits-hyp}
&& errant_compare -hyp ${edits-hyp} -ref ${edits-gold} -cat 2 -json_path ${results}
&& cat ${results}
deps:
- ${edits-gold}
- ${final-data-test-orig}
- ${final-predictions}
outs:
- ${edits-hyp}
metrics:
- ${results}:
cache: false