-
Notifications
You must be signed in to change notification settings - Fork 1
/
normalizer_test.go
119 lines (114 loc) · 2.65 KB
/
normalizer_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package gitserver
import (
"bytes"
"io/ioutil"
"testing"
)
func TestConvertMarkdownToUTF8(t *testing.T) {
cases := []struct {
encoded []byte
decoded string
}{
// UTF-8 BOM
{[]byte{0xEF, 0xBB, 0xBF, 0xC3, 0xA9}, "é\n"},
// UTF-16 (LE) BOM
{[]byte{0xFF, 0xFE, 0xE9, 0x00}, "é\n"},
// UTF-16 (BE) BOM
{[]byte{0xFE, 0xFF, 0x00, 0xE9}, "é\n"},
// UTF-32 (LE) BOM
{[]byte{0xFF, 0xFE, 0x00, 0x00, 0xE9, 0x00, 0x00, 0x00}, "é\n"},
// UTF-32 (BE) BOM
{[]byte{0x00, 0x00, 0xFE, 0xFF, 0x00, 0x00, 0x00, 0xE9}, "é\n"},
// UTF-8 (no BOM)
{[]byte{0xC3, 0xA9}, "é\n"},
// UTF-8 (no BOM)
{
[]byte{
0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x63, 0x69, 0xC3, 0xB3, 0x6E,
0x20, 0xC3, 0xA1, 0x67, 0x75, 0x69, 0x6C, 0x61, 0x20, 0x6C, 0x6F, 0x73,
0x20, 0x4D, 0x20, 0x69, 0x6E, 0x74, 0x65, 0x72, 0x76, 0x61, 0x6C, 0x6F,
0x73, 0x20, 0x71, 0x75, 0x65, 0x20, 0x73, 0x65, 0x20, 0x74, 0x65, 0x20,
0x64, 0x61, 0x72, 0xC3, 0xA1, 0x6E, 0x2E, 0x0A,
},
"Descripción águila los M intervalos que se te darán.\n",
},
// Latin-1 (ISO-8859-1)
{[]byte{0x50, 0x6F, 0x6B, 0xE9, 0x6D, 0x6F, 0x6E}, "Pokémon\n"},
// Empty
{[]byte{}, ""},
}
for _, c := range cases {
r, err := ConvertMarkdownToUTF8(bytes.NewReader(c.encoded))
if err != nil {
t.Errorf(
"error converting %q to UTF-8: %q",
c,
err,
)
} else {
contents, err := ioutil.ReadAll(r)
if err != nil {
t.Errorf(
"error reading UTF-8 contents: %q",
err,
)
} else if c.decoded != string(contents) {
t.Errorf(
"conversion error for case %q. Expected %q, got %q",
c,
c.decoded,
string(contents),
)
}
}
}
}
func TestNormalizeCase(t *testing.T) {
cases := []struct {
input []byte
output string
}{
// leading whitespace
{[]byte{0x20, 0x78}, " x\n"},
// trailing whitespace
{[]byte{0x78, 0x20}, "x\n"},
// CRLF
{[]byte{0x78, 0x0D, 0x0A, 0x78}, "x\nx\n"},
// CR
{[]byte{0x78, 0x0D, 0x78}, "x\nx\n"},
// LR
{[]byte{0x78, 0x0A, 0x78}, "x\nx\n"},
// LR
{[]byte{0x78, 0x0A, 0x20}, "x\n\n"},
// UTF16-LE BOM
{[]byte{0xFF, 0xFE, 0x78, 0x00}, "x\n"},
// Missing newline
{[]byte{0x78}, "x\n"},
// Empty file
{[]byte{}, ""},
}
for _, c := range cases {
r, err := NormalizeCase(bytes.NewReader(c.input))
if err != nil {
t.Errorf(
"error converting to UTF-8: %q",
err,
)
} else {
contents, err := ioutil.ReadAll(r)
if err != nil {
t.Errorf(
"error normalizing: %q",
err,
)
} else if c.output != string(contents) {
t.Errorf(
"normalizer error for case %q. Expected %q, got %q",
c,
c.output,
string(contents),
)
}
}
}
}