forked from Helsinki-NLP/Tatoeba-Challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
opus-2021-02-18.yml
108 lines (108 loc) · 3.29 KB
/
opus-2021-02-18.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
release: fiu-sit/opus-2021-02-18.zip
release-date: 2021-02-18
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
- source: spm32k
- target: spm32k
subword-models:
- source: source.spm
- target: target.spm
use-target-labels:
- >>cmn<<
- >>cmn_Hans<<
- >>cmn_Hant<<
- >>nan<<
- >>yue_Hans<<
- >>yue_Hant<<
source-languages:
- fin
- hun
- mhr
target-languages:
- cmn
- nan
- yue
training-data:
- fin-cmn: Tatoeba-train (187275)
- fin-cmn_Hans: Tatoeba-train (1000000)
- fin-cmn_Hant: Tatoeba-train (1000000)
- fin-nan: Tatoeba-train (57)
- fin-yue_Hans: Tatoeba-train (22)
- fin-yue_Hant: Tatoeba-train (5518)
- hun-cmn: Tatoeba-train (225861)
- hun-cmn_Hans: Tatoeba-train (1000000)
- hun-cmn_Hant: Tatoeba-train (1000000)
- hun-nan: Tatoeba-train (17)
- hun-yue_Hans: Tatoeba-train (30)
- hun-yue_Hant: Tatoeba-train (5754)
- mhr-cmn_Hans: Tatoeba-train (7)
- mhr-cmn_Hant: Tatoeba-train (7)
- mhr-nan: Tatoeba-train (1)
- mhr-yue_Hant: Tatoeba-train (6)
validation-data:
- cmn-fin: Tatoeba-dev, 15
- cmn_Hans-fin: Tatoeba-dev, 584
- cmn_Hant-fin: Tatoeba-dev, 400
- fin-yue_Hant: Tatoeba-dev, 1
- cmn-hun: Tatoeba-dev, 17
- cmn_Hans-hun: Tatoeba-dev, 561
- cmn_Hant-hun: Tatoeba-dev, 420
- hun-yue_Hant: Tatoeba-dev, 2
- cmn-mhr: Tatoeba-dev, 14
- cmn_Hans-mhr: Tatoeba-dev, 312
- cmn_Hant-mhr: Tatoeba-dev, 362
- mhr-nan: Tatoeba-dev, 19
- mhr-yue_Hans: Tatoeba-dev, 5
- mhr-yue_Hant: Tatoeba-dev, 245
- total size of shuffled dev data: 2888
- devset = top 2888 lines of Tatoeba-dev.src.shuffled!
test-data:
- Tatoeba-test.chm-zho: 12/102
- Tatoeba-test.fin-cmn_Hans: 173/1394
- Tatoeba-test.fin-cmn_Hant: 173/1285
- Tatoeba-test.fin-cmn: 1/7
- Tatoeba-test.fin-nan: 1/9
- Tatoeba-test.fin-yue_Hans: 17/179
- Tatoeba-test.fin-yue_Hant: 15/85
- Tatoeba-test.fin-zho: 380/2959
- Tatoeba-test.hun-cmn_Hans: 247/2563
- Tatoeba-test.hun-cmn_Hant: 185/1714
- Tatoeba-test.hun-cmn: 1/7
- Tatoeba-test.hun-zho: 433/4284
- Tatoeba-test.mhr-cmn_Hans: 4/28
- Tatoeba-test.mhr-cmn_Hant: 8/74
- Tatoeba-test.multi-multi: 825/7345
BLEU-scores:
- Tatoeba-test.chm-zho: 0.4
- Tatoeba-test.fin-cmn_Hans: 32.4
- Tatoeba-test.fin-cmn_Hant: 33.9
- Tatoeba-test.fin-cmn: 7.3
- Tatoeba-test.fin-nan: 1.7
- Tatoeba-test.fin-yue_Hans: 1.2
- Tatoeba-test.fin-yue_Hant: 2.1
- Tatoeba-test.fin-zho: 29.7
- Tatoeba-test.hun-cmn_Hans: 26.2
- Tatoeba-test.hun-cmn_Hant: 23.6
- Tatoeba-test.hun-cmn: 43.0
- Tatoeba-test.hun-zho: 25.2
- Tatoeba-test.mhr-cmn_Hans: 1.5
- Tatoeba-test.mhr-cmn_Hant: 0.4
- Tatoeba-test.multi-multi: 26.8
chr-F-scores:
- Tatoeba-test.chm-zho: 0.012
- Tatoeba-test.fin-cmn_Hans: 0.255
- Tatoeba-test.fin-cmn_Hant: 0.272
- Tatoeba-test.fin-cmn: 0.070
- Tatoeba-test.fin-nan: 0.000
- Tatoeba-test.fin-yue_Hans: 0.030
- Tatoeba-test.fin-yue_Hant: 0.042
- Tatoeba-test.fin-zho: 0.234
- Tatoeba-test.hun-cmn_Hans: 0.225
- Tatoeba-test.hun-cmn_Hant: 0.207
- Tatoeba-test.hun-cmn: 0.330
- Tatoeba-test.hun-zho: 0.218
- Tatoeba-test.mhr-cmn_Hans: 0.027
- Tatoeba-test.mhr-cmn_Hant: 0.007
- Tatoeba-test.multi-multi: 0.222