-
Notifications
You must be signed in to change notification settings - Fork 0
/
base45.go
313 lines (249 loc) · 8.58 KB
/
base45.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
// Package base45 implements encoding and decoding of base 45 data by
// https://datatracker.ietf.org/doc/rfc9285/
package base45
import (
"bytes"
"encoding/binary"
"math"
"net/url"
)
/*
Chapter references:
[1] https://datatracker.ietf.org/doc/rfc9285/
2022-08-11 rfc9285
*/
/*
[1] Chapter 4:
A 45-character subset of US-ASCII is used; the 45 characters usable
in a QR code in Alphanumeric mode (see Section 7.3.4 and Table 2 of
ISO18004). Base45 encodes 2 bytes in 3 characters, compared to
Base64, which encodes 3 bytes in 4 characters.
[1] Chapter 4.2:
The Alphanumeric mode is defined to use 45 characters as specified in
this alphabet.
*/
// Alphabet defines the 45 usable characters for the base 45 encoding.
var Alphabet = []byte{
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
' ', '$', '%', '*', '+', '-', '.', '/', ':',
}
// encodeSingleByte takes in a byte and converts it to base 45.
func encodeSingleByte(in byte) []byte {
/*
[1] Chapter 4:
For encoding a single byte [a], it MUST be interpreted as a base 256
number, i.e. as an unsigned integer over 8 bits. That integer MUST
be converted to base 45 [c d] so that a = c + (45*d). The values c
and d are then looked up in Table 1 to produce a two character
string.
*/
a := int(in)
c := Alphabet[a%45]
d := Alphabet[a/45%45]
return []byte{c, d}
}
// encodeTwoBytes takes two bytes and converts it to base 45.
func encodeTwoBytes(in []byte) []byte {
/*
[1] Chapter 4:
For encoding, two bytes [a, b] MUST be interpreted as a number n in
base 256, i.e. as an unsigned integer over 16 bits so that the number
n = (a * 256) + b.
*/
n := binary.BigEndian.Uint16(in)
/*
[1] Chapter 4:
This number n is converted to base 45 [c, d, e] so that n = c + (d *
45) + (e * 45 * 45). Note the order of c, d and e which are chosen
so that the left-most [c] is the least significant.
The values c, d, and e are then looked up in Table 1 to produce a
three character string. The process is reversed when decoding.
*/
c := Alphabet[n%45]
d := Alphabet[n/45%45]
e := Alphabet[n/(45*45)%45]
return []byte{c, d, e}
}
// Encode encodes the given byte to base 45.
// If an empty input is given, an empty result will be returned.
func Encode(in []byte) []byte {
// Instead of analysing the possible output length, we
// create a byte array with the estimated capacity of two
// output bytes per one input byte, which is a bit more
// than we need, but it keeps the code clean.
out := make([]byte, 0, len(in)*2)
// Next up we consume chunks up to two bytes of decoded date
// and encode it to base 45.
buf := make([]byte, 2)
reader := bytes.NewReader(in)
for {
n, _ := reader.Read(buf)
if n == 2 {
out = append(out, encodeTwoBytes(buf)...)
} else if n == 1 {
out = append(out, encodeSingleByte(buf[0])...)
} else {
// on EOF or error
break
}
}
return out
}
// EncodeURLSafe encodes the given bytes to a query safe string.
// If an empty input is given, an empty result will be returned.
func EncodeURLSafe(in []byte) string {
/*
[1] Chapter 6:
It should be noted that the resulting string after encoding to Base45
might include non-URL-safe characters so if the URL including the
Base45 encoded data has to be URL-safe, one has to use percent-
encoding.
*/
parts := &url.URL{Path: string(Encode(in))}
return parts.String()
}
// decodeTwoBytes decodes two base 45 encoded bytes to one decoded byte.
// This will be used for very short or trailing base 45 encoded data.
func decodeTwoBytes(dst, src []byte) error {
/*
[1] Chapter 4:
For encoding a single byte [a], it MUST be interpreted as a base 256
number, i.e. as an unsigned integer over 8 bits. That integer MUST
be converted to base 45 [c d] so that a = c + (45 * d). The values c
and d are then looked up in Table 1 to produce a two-character
string.
For decoding a Base45 encoded string the inverse operations are
performed.
*/
c := bytes.IndexByte(Alphabet, src[0])
d := bytes.IndexByte(Alphabet, src[1])
val := c + (d * 45)
// Detect possible overflow attack
if val > math.MaxUint8 {
return ErrInvalidEncodedDataOverflow
}
copy(dst, []byte{byte(val)})
return nil
}
// decodeThreeBytes decodes three base 45 encoded bytes to two decoded bytes.
func decodeThreeBytes(dst, src []byte) error {
/*
[1] Chapter 4:
For encoding, two bytes [a, b] MUST be interpreted as a number n in
base 256, i.e. as an unsigned integer over 16 bits so that the number
n = (a * 256) + b.
This number n is converted to base 45 [c, d, e] so that n = c + (d *
45) + (e * 45 * 45). Note the order of c, d and e which are chosen
so that the left-most [c] is the least significant.
The values c, d, and e are then looked up in Table 1 to produce a
three character string. The process is reversed when decoding.
For decoding a Base45 encoded string the inverse operations are
performed.
*/
// We skip checks if c, d, e return -1 as the exposed Decode function
// already does an alphabet check and only allowed entries pass through here.
c := bytes.IndexByte(Alphabet, src[0])
d := bytes.IndexByte(Alphabet, src[1])
e := bytes.IndexByte(Alphabet, src[2])
val := c + (d * 45) + (e * 45 * 45)
/*
[1] Chapter 6:
When implementing encoding and decoding it is important to be very
careful so that buffer overflow or similar issues do not occur. This
of course includes the calculations in base 45 and lookup in the
table of characters (Table 1). A decoder must also be robust
regarding input, including proper handling of any octet value 0-255,
including the NUL character (ASCII 0).
*/
if val > math.MaxUint16 {
return ErrInvalidEncodedDataOverflow
}
binary.BigEndian.PutUint16(dst, uint16(val))
return nil
}
// Decode reads the base 45 encoded bytes and returns the decoded bytes.
// If an empty input is given, ErrEmptyInput is returned.
func Decode(in []byte) ([]byte, error) {
// Calls to this function expect an input, empty calls should not happen.
if len(in) == 0 {
return nil, ErrEmptyInput
}
/*
[1] Chapter 6:
Implementations MUST reject any input that is not a valid encoding.
For example, it MUST reject the input (encoded data) if it contains
characters outside the base alphabet (in Table 1) when interpreting
base-encoded data.
*/
for _, v := range in {
if !bytes.Contains(Alphabet, []byte{v}) {
return nil, ErrInvalidEncodingCharacters
}
}
/*
[1] Chapter 4:
A byte string [a b c d ... x y z] with arbitrary content and
arbitrary length MUST be encoded as follows: From left to right pairs
of bytes MUST be encoded as described above. If the number of bytes
is even, then the encoded form is a string with a length that is
evenly divisible by 3. If the number of bytes is odd, then the last
(rightmost) byte MUST be encoded on two characters as described
above.
For decoding a Base45 encoded string the inverse operations are
performed.
*/
if len(in)%3 != 0 && (len(in)+1)%3 != 0 {
return nil, ErrInvalidLength
}
// Instead of analysing the possible output length, we allocate
// enough capacity to keep the code clean and readable. In this case
// the expected output length will always be smaller than the input length.
out := make([]byte, len(in))
buf := make([]byte, 3)
reader := bytes.NewReader(in)
written := 0
for {
read, _ := reader.Read(buf)
if read == 3 {
// Three bytes go in, two come out, we copy them into the output slice
err := decodeThreeBytes(out[written:written+2], buf)
if err != nil {
return nil, err
}
written += 2
} else if read == 2 {
// Two bytes go in, one comes out, we copy it into the output slice
err := decodeTwoBytes(out[written:written+1], buf[0:2])
if err != nil {
return nil, err
}
written += 1
} else {
// this happens on EOF or error, as read == 0 in both cases
break
}
}
return out[:written], nil
}
// DecodeURLSafe reads the given url encoded base 45 encoded data and returns the decoded bytes.
// If an empty input is given, ErrEmptyInput is returned.
func DecodeURLSafe(in string) ([]byte, error) {
/*
[1] Chapter 6:
It should be noted that the resulting string after encoding to Base45
might include non-URL-safe characters so if the URL including the
Base45 encoded data has to be URL-safe, one has to use percent-
encoding.
*/
enc, err := url.QueryUnescape(in)
if err != nil {
return nil, ErrInvalidURLSafeEscaping
}
dec, err := Decode([]byte(enc))
if err != nil {
return nil, err
}
return dec, nil
}