forked from Helsinki-NLP/Tatoeba-Challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
opus4m-2020-08-12.yml
108 lines (108 loc) · 2.94 KB
/
opus4m-2020-08-12.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
release: tut-eng/opus4m-2020-08-12.zip
release-date: 2020-08-12
dataset-name: opus4m
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
- source: spm32k
- target: spm32k
subword-models:
- source: source.spm
- target: target.spm
source-languages:
- aze
- bak
- chv
- crh
- kaz
- kir
- kjh
- kum
- mon
- nog
- ota
- sah
- tat
- tuk
- tur
- tyv
- uig
- uzb
- xal
target-languages:
- eng
test-data:
- Tatoeba-test.aze-eng: 2659/13295
- Tatoeba-test.bak-eng: 39/173
- Tatoeba-test.chv-eng: 333/1876
- Tatoeba-test.crh-eng: 22/92
- Tatoeba-test.kaz-eng: 397/2176
- Tatoeba-test.kir-eng: 118/583
- Tatoeba-test.kjh-eng: 17/60
- Tatoeba-test.kum-eng: 8/41
- Tatoeba-test.mon-eng: 337/2256
- Tatoeba-test.multi-eng: 3070/15499
- Tatoeba-test.nog-eng: 83/373
- Tatoeba-test.ota-eng: 678/3612
- Tatoeba-test.sah-eng: 39/183
- Tatoeba-test.tat-eng: 1451/9447
- Tatoeba-test.tuk-eng: 2500/16260
- Tatoeba-test.tur-eng: 10000/66526
- Tatoeba-test.tyv-eng: 5/20
- Tatoeba-test.uig-eng: 3024/19231
- Tatoeba-test.uzb-eng: 457/2184
- Tatoeba-test.xal-eng: 281/1726
- newsdev2016-entr.tur-eng: 1001/19474
- newstest2016-entr.tur-eng: 3000/58396
- newstest2017-entr.tur-eng: 3007/59741
- newstest2018-entr.tur-eng: 3000/60232
BLEU-scores:
- Tatoeba-test.aze-eng: 13.2
- Tatoeba-test.bak-eng: 2.9
- Tatoeba-test.chv-eng: 0.8
- Tatoeba-test.crh-eng: 14.6
- Tatoeba-test.kaz-eng: 11.8
- Tatoeba-test.kir-eng: 15.1
- Tatoeba-test.kjh-eng: 1.7
- Tatoeba-test.kum-eng: 14.6
- Tatoeba-test.mon-eng: 6.4
- Tatoeba-test.multi-eng: 10.6
- Tatoeba-test.nog-eng: 9.0
- Tatoeba-test.ota-eng: 1.4
- Tatoeba-test.sah-eng: 0.7
- Tatoeba-test.tat-eng: 4.6
- Tatoeba-test.tuk-eng: 5.5
- Tatoeba-test.tur-eng: 28.6
- Tatoeba-test.tyv-eng: 5.5
- Tatoeba-test.uig-eng: 0.1
- Tatoeba-test.uzb-eng: 5.1
- Tatoeba-test.xal-eng: 1.4
- newsdev2016-entr.tur-eng: 5.9
- newstest2016-entr.tur-eng: 4.6
- newstest2017-entr.tur-eng: 4.6
- newstest2018-entr.tur-eng: 4.9
chr-F-scores:
- Tatoeba-test.aze-eng: 0.322
- Tatoeba-test.bak-eng: 0.190
- Tatoeba-test.chv-eng: 0.162
- Tatoeba-test.crh-eng: 0.322
- Tatoeba-test.kaz-eng: 0.302
- Tatoeba-test.kir-eng: 0.337
- Tatoeba-test.kjh-eng: 0.099
- Tatoeba-test.kum-eng: 0.314
- Tatoeba-test.mon-eng: 0.251
- Tatoeba-test.multi-eng: 0.296
- Tatoeba-test.nog-eng: 0.263
- Tatoeba-test.ota-eng: 0.150
- Tatoeba-test.sah-eng: 0.118
- Tatoeba-test.tat-eng: 0.228
- Tatoeba-test.tuk-eng: 0.256
- Tatoeba-test.tur-eng: 0.459
- Tatoeba-test.tyv-eng: 0.190
- Tatoeba-test.uig-eng: 0.113
- Tatoeba-test.uzb-eng: 0.208
- Tatoeba-test.xal-eng: 0.150
- newsdev2016-entr.tur-eng: 0.259
- newstest2016-entr.tur-eng: 0.246
- newstest2017-entr.tur-eng: 0.246
- newstest2018-entr.tur-eng: 0.247