forked from Helsinki-NLP/Tatoeba-Challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
opus-2021-02-18.yml
86 lines (86 loc) · 2.35 KB
/
opus-2021-02-18.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
release: zle-fiu/opus-2021-02-18.zip
release-date: 2021-02-18
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
- source: spm32k
- target: spm32k
subword-models:
- source: source.spm
- target: target.spm
use-target-labels:
- >>est<<
- >>fin<<
- >>hun<<
- >>izh<<
- >>mhr<<
- >>udm<<
source-languages:
- bel
- rus
- ukr
target-languages:
- est
- fin
- hun
- izh
- mhr
- udm
training-data:
- bel-est: Tatoeba-train (64528)
- bel-hun: Tatoeba-train (68222)
- bel_Latn-est: Tatoeba-train (586)
- bel_Latn-hun: Tatoeba-train (636)
- rus-est: Tatoeba-train (1000000)
- rus-fin: Tatoeba-train (1000000)
- rus-hun: Tatoeba-train (1000000)
- rus-mhr: Tatoeba-train (3922)
- rus-udm: Tatoeba-train (9504)
- ukr-est: Tatoeba-train (1000000)
- ukr-fin: Tatoeba-train (1000000)
- ukr-hun: Tatoeba-train (1000000)
validation-data:
- bel-est: Tatoeba-dev, 997
- bel_Latn-est: Tatoeba-dev, 3
- total size of shuffled dev data: 993
- devset = top 993 lines of Tatoeba-dev.bel-est.src.shuffled!
test-data:
- Tatoeba-test.bel-est: 2/31
- Tatoeba-test.bel-hun: 3/16
- Tatoeba-test.multi-multi: 10000/59597
- Tatoeba-test.rus-chm: 2750/18927
- Tatoeba-test.rus-est: 598/3572
- Tatoeba-test.rus-fin: 3643/19307
- Tatoeba-test.rus-hun: 2500/15042
- Tatoeba-test.rus-izh: 8/33
- Tatoeba-test.rus-udm: 54/300
- Tatoeba-test.ukr-est: 16/85
- Tatoeba-test.ukr-fin: 32/186
- Tatoeba-test.ukr-hun: 464/2426
BLEU-scores:
- Tatoeba-test.bel-est: 3.7
- Tatoeba-test.bel-hun: 54.8
- Tatoeba-test.multi-multi: 26.5
- Tatoeba-test.rus-chm: 0.9
- Tatoeba-test.rus-est: 54.0
- Tatoeba-test.rus-fin: 36.8
- Tatoeba-test.rus-hun: 34.8
- Tatoeba-test.rus-izh: 0.7
- Tatoeba-test.rus-udm: 2.7
- Tatoeba-test.ukr-est: 35.8
- Tatoeba-test.ukr-fin: 41.8
- Tatoeba-test.ukr-hun: 40.5
chr-F-scores:
- Tatoeba-test.bel-est: 0.437
- Tatoeba-test.bel-hun: 0.771
- Tatoeba-test.multi-multi: 0.464
- Tatoeba-test.rus-chm: 0.154
- Tatoeba-test.rus-est: 0.714
- Tatoeba-test.rus-fin: 0.610
- Tatoeba-test.rus-hun: 0.581
- Tatoeba-test.rus-izh: 0.047
- Tatoeba-test.rus-udm: 0.224
- Tatoeba-test.ukr-est: 0.687
- Tatoeba-test.ukr-fin: 0.640
- Tatoeba-test.ukr-hun: 0.647