forked from Helsinki-NLP/Tatoeba-Challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
opus2m-2020-08-01.yml
128 lines (128 loc) · 3.19 KB
/
opus2m-2020-08-01.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
release: eng-inc/opus2m-2020-08-01.zip
release-date: 2020-08-01
dataset-name: opus2m
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
- source: spm32k
- target: spm32k
subword-models:
- source: source.spm
- target: target.spm
source-languages:
- eng
target-languages:
- asm
- awa
- ben
- bho
- gom
- guj
- hif
- hin
- mai
- mar
- npi
- ori
- pan
- pnb
- rom
- san
- sin
- snd
- urd
use-target-labels:
- >>asm<<
- >>awa<<
- >>ben<<
- >>bho<<
- >>gom<<
- >>guj<<
- >>hif_Latn<<
- >>hin<<
- >>mai<<
- >>mar<<
- >>npi<<
- >>ori<<
- >>pan_Guru<<
- >>pnb<<
- >>rom<<
- >>san_Deva<<
- >>sin<<
- >>snd_Arab<<
- >>urd<<
test-data:
- Tatoeba-test.eng-asm: 117/530
- Tatoeba-test.eng-awa: 279/1087
- Tatoeba-test.eng-ben: 2500/10942
- Tatoeba-test.eng-bho: 42/224
- Tatoeba-test.eng-guj: 154/680
- Tatoeba-test.eng-hif: 36/177
- Tatoeba-test.eng-hin: 5000/31361
- Tatoeba-test.eng-kok: 1/5
- Tatoeba-test.eng-lah: 32/180
- Tatoeba-test.eng-mai: 8/16
- Tatoeba-test.eng-mar: 10000/47665
- Tatoeba-test.eng-multi: 10000/53001
- Tatoeba-test.eng-nep: 115/398
- Tatoeba-test.eng-ori: 33/190
- Tatoeba-test.eng-pan: 87/570
- Tatoeba-test.eng-rom: 671/4283
- Tatoeba-test.eng-san: 144/366
- Tatoeba-test.eng-sin: 45/189
- Tatoeba-test.eng-snd: 4/17
- Tatoeba-test.eng-urd: 1663/12106
- newsdev2014.eng-hin: 520/9310
- newsdev2019-engu.eng-guj: 1998/34842
- newstest2014-hien.eng-hin: 2507/57037
- newstest2019-engu.eng-guj: 998/19483
BLEU-scores:
- Tatoeba-test.eng-asm: 2.7
- Tatoeba-test.eng-awa: 0.5
- Tatoeba-test.eng-ben: 16.7
- Tatoeba-test.eng-bho: 4.3
- Tatoeba-test.eng-guj: 17.5
- Tatoeba-test.eng-hif: 0.6
- Tatoeba-test.eng-hin: 17.7
- Tatoeba-test.eng-kok: 1.7
- Tatoeba-test.eng-lah: 0.3
- Tatoeba-test.eng-mai: 15.6
- Tatoeba-test.eng-mar: 21.3
- Tatoeba-test.eng-multi: 17.3
- Tatoeba-test.eng-nep: 0.8
- Tatoeba-test.eng-ori: 2.2
- Tatoeba-test.eng-pan: 8.0
- Tatoeba-test.eng-rom: 0.4
- Tatoeba-test.eng-san: 0.5
- Tatoeba-test.eng-sin: 9.1
- Tatoeba-test.eng-snd: 4.4
- Tatoeba-test.eng-urd: 13.3
- newsdev2014.eng-hin: 8.2
- newsdev2019-engu.eng-guj: 6.5
- newstest2014-hien.eng-hin: 11.4
- newstest2019-engu.eng-guj: 7.2
chr-F-scores:
- Tatoeba-test.eng-asm: 0.277
- Tatoeba-test.eng-awa: 0.132
- Tatoeba-test.eng-ben: 0.470
- Tatoeba-test.eng-bho: 0.227
- Tatoeba-test.eng-guj: 0.373
- Tatoeba-test.eng-hif: 0.028
- Tatoeba-test.eng-hin: 0.469
- Tatoeba-test.eng-kok: 0.000
- Tatoeba-test.eng-lah: 0.028
- Tatoeba-test.eng-mai: 0.429
- Tatoeba-test.eng-mar: 0.477
- Tatoeba-test.eng-multi: 0.448
- Tatoeba-test.eng-nep: 0.081
- Tatoeba-test.eng-ori: 0.208
- Tatoeba-test.eng-pan: 0.347
- Tatoeba-test.eng-rom: 0.197
- Tatoeba-test.eng-san: 0.108
- Tatoeba-test.eng-sin: 0.364
- Tatoeba-test.eng-snd: 0.284
- Tatoeba-test.eng-urd: 0.423
- newsdev2014.eng-hin: 0.342
- newsdev2019-engu.eng-guj: 0.293
- newstest2014-hien.eng-hin: 0.364
- newstest2019-engu.eng-guj: 0.296