-
Notifications
You must be signed in to change notification settings - Fork 2
/
utf8.c
98 lines (82 loc) · 2.44 KB
/
utf8.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/*
Cuckoo Sandbox - Automated Malware Analysis
Copyright (C) 2010-2014 Cuckoo Sandbox Developers
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <windows.h>
#include "utf8.h"
int utf8_encode(unsigned short c, unsigned char *out)
{
if(c < 0x80) {
*out = c & 0x7f;
return 1;
}
else if(c < 0x800) {
*out = 0xc0 + ((c >> 8) << 2) + (c >> 6);
out[1] = 0x80 + (c & 0x3f);
return 2;
}
else {
*out = 0xe0 + (c >> 12);
out[1] = 0x80 + (((c >> 8) & 0x1f) << 2) + ((c >> 6) & 0x3);
out[2] = 0x80 + (c & 0x3f);
return 3;
}
}
int utf8_length(unsigned short x)
{
unsigned char buf[3];
return utf8_encode(x, buf);
}
int utf8_strlen_ascii(const char *s, int len)
{
if(len < 0) len = strlen(s);
int ret = 0;
while (len-- != 0) {
ret += utf8_length(*s++);
}
return ret;
}
int utf8_strlen_unicode(const wchar_t *s, int len)
{
if(len < 0) len = lstrlenW(s);
int ret = 0;
while (len-- != 0) {
ret += utf8_length(*s++);
}
return ret;
}
char * utf8_string(const char *str, int length)
{
if (length == -1) length = strlen(str);
int encoded_length = utf8_strlen_ascii(str, length);
char * utf8string = (char *) malloc(encoded_length+4);
*((int *) utf8string) = encoded_length;
int pos = 4;
while (length-- != 0) {
pos += utf8_encode(*str++, (unsigned char *) &utf8string[pos]);
}
return utf8string;
}
char * utf8_wstring(const wchar_t *str, int length)
{
if (length == -1) length = lstrlenW(str);
int encoded_length = utf8_strlen_unicode(str, length);
char * utf8string = (char *) malloc(encoded_length+4);
*((int *) utf8string) = encoded_length;
int pos = 4;
while (length-- != 0) {
pos += utf8_encode(*str++, (unsigned char *) &utf8string[pos]);
}
return utf8string;
}