forked from Helsinki-NLP/Tatoeba-Challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathopus-2020-07-27.yml
121 lines (121 loc) · 3.01 KB
/
opus-2020-07-27.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
release: eng-trk/opus-2020-07-27.zip
release-date: 2020-07-27
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
- source: spm32k
- target: spm32k
subword-models:
- source: source.spm
- target: target.spm
source-languages:
- eng
target-languages:
- aze
- bak
- chv
- crh
- kaz
- kir
- kjh
- kum
- ota
- sah
- tat
- tuk
- tur
- tyv
- uig
- uzb
use-target-labels:
- >>aze_Latn<<
- >>bak<<
- >>chv<<
- >>crh<<
- >>crh_Latn<<
- >>kaz_Cyrl<<
- >>kaz_Latn<<
- >>kir_Cyrl<<
- >>kjh<<
- >>kum<<
- >>ota_Arab<<
- >>ota_Latn<<
- >>sah<<
- >>tat<<
- >>tat_Arab<<
- >>tat_Latn<<
- >>tuk<<
- >>tuk_Latn<<
- >>tur<<
- >>tyv<<
- >>uig_Arab<<
- >>uig_Cyrl<<
- >>uzb_Cyrl<<
- >>uzb_Latn<<
test-data:
- Tatoeba-test.eng-aze: 2659/10046
- Tatoeba-test.eng-bak: 39/140
- Tatoeba-test.eng-chv: 333/1358
- Tatoeba-test.eng-crh: 22/81
- Tatoeba-test.eng-kaz: 397/1668
- Tatoeba-test.eng-kir: 118/428
- Tatoeba-test.eng-kjh: 17/48
- Tatoeba-test.eng-kum: 8/25
- Tatoeba-test.eng-multi: 10000/46183
- Tatoeba-test.eng-ota: 678/3328
- Tatoeba-test.eng-sah: 39/131
- Tatoeba-test.eng-tat: 1451/6996
- Tatoeba-test.eng-tuk: 2500/12809
- Tatoeba-test.eng-tur: 10000/49076
- Tatoeba-test.eng-tyv: 5/19
- Tatoeba-test.eng-uig: 3024/13084
- Tatoeba-test.eng-uzb: 457/1514
- newsdev2016-entr.eng-tur: 1001/14044
- newstest2016-entr.eng-tur: 3000/44195
- newstest2017-entr.eng-tur: 3007/45049
- newstest2018-entr.eng-tur: 3000/45944
BLEU-scores:
- Tatoeba-test.eng-aze: 26.0
- Tatoeba-test.eng-bak: 9.2
- Tatoeba-test.eng-chv: 3.9
- Tatoeba-test.eng-crh: 7.6
- Tatoeba-test.eng-kaz: 10.4
- Tatoeba-test.eng-kir: 26.9
- Tatoeba-test.eng-kjh: 2.0
- Tatoeba-test.eng-kum: 2.7
- Tatoeba-test.eng-multi: 18.8
- Tatoeba-test.eng-ota: 0.4
- Tatoeba-test.eng-sah: 0.7
- Tatoeba-test.eng-tat: 9.6
- Tatoeba-test.eng-tuk: 5.5
- Tatoeba-test.eng-tur: 33.4
- Tatoeba-test.eng-tyv: 3.6
- Tatoeba-test.eng-uig: 0.1
- Tatoeba-test.eng-uzb: 3.3
- newsdev2016-entr.eng-tur: 9.5
- newstest2016-entr.eng-tur: 8.0
- newstest2017-entr.eng-tur: 7.8
- newstest2018-entr.eng-tur: 8.2
chr-F-scores:
- Tatoeba-test.eng-aze: 0.568
- Tatoeba-test.eng-bak: 0.320
- Tatoeba-test.eng-chv: 0.266
- Tatoeba-test.eng-crh: 0.347
- Tatoeba-test.eng-kaz: 0.352
- Tatoeba-test.eng-kir: 0.508
- Tatoeba-test.eng-kjh: 0.052
- Tatoeba-test.eng-kum: 0.073
- Tatoeba-test.eng-multi: 0.447
- Tatoeba-test.eng-ota: 0.064
- Tatoeba-test.eng-sah: 0.028
- Tatoeba-test.eng-tat: 0.309
- Tatoeba-test.eng-tuk: 0.309
- Tatoeba-test.eng-tur: 0.617
- Tatoeba-test.eng-tyv: 0.125
- Tatoeba-test.eng-uig: 0.152
- Tatoeba-test.eng-uzb: 0.268
- newsdev2016-entr.eng-tur: 0.423
- newstest2016-entr.eng-tur: 0.397
- newstest2017-entr.eng-tur: 0.394
- newstest2018-entr.eng-tur: 0.396