utf8与unicode互转(不依赖第三方库)
# unicode 转utf8
out:输出的字符串
unicode[]:输入的unicode数组
int_len:输入的unicode数组的长度
返回值:编码成utf8后的长度
Uint64 UnicodeToUtf8(char* out, Uint32 unicode[], Uint32 int_len)
{
Uint64 char_len = 0;
for (int i = 0; i < int_len; i++)
{
if (unicode[i] <= 0x7F) {
// Plain ASCII
out[char_len++] = (char)unicode[i];
}
else if (unicode[i] <= 0x07FF) {
// 2-byte unicode
out[char_len++] = (char)(((unicode[i] >> 6) & 0x1F) | 0xC0);
out[char_len++] = (char)(((unicode[i] >> 0) & 0x3F) | 0x80);
}
else if (unicode[i] <= 0xFFFF) {
// 3-byte unicode
out[char_len++] = (char)(((unicode[i] >> 12) & 0x0F) | 0xE0);
out[char_len++] = (char)(((unicode[i] >> 6) & 0x3F) | 0x80);
out[char_len++] = (char)(((unicode[i] >> 0) & 0x3F) | 0x80);
}
else if (unicode[i] <= 0x10FFFF) {
// 4-byte unicode
out[char_len++] = (char)(((unicode[i] >> 18) & 0x07) | 0xF0);
out[char_len++] = (char)(((unicode[i] >> 12) & 0x3F) | 0x80);
out[char_len++] = (char)(((unicode[i] >> 6) & 0x3F) | 0x80);
out[char_len++] = (char)(((unicode[i] >> 0) & 0x3F) | 0x80);
}
else {
// error
return 0;
}
}
out[char_len] = 0;
return char_len;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# utf8转unicode
PAL_DS::Doublet<Uint32*, Uint32> Utf8ToUnicode(const char * name)
{
Uint32* unicode = new Uint32[strLen(name)];
Uint32 unicode_len = 0;
int read_pos = 0;
while(read_pos < strLen(name))
{
unsigned char mask = 1 << 7;
int size = 0;
for (; size < 8; size++)
{
if ( (name[read_pos] & mask) == 0)
{
break;
}
mask >>= 1;
}
switch (size)
{
case 0:
unicode[unicode_len] = (Uint32)name[read_pos];
unicode_len++;
read_pos++;
break;
//未完成
default:
cout << "we have not support utf8 more than one size!";
break;
}
}
return PAL_DS::Doublet<Uint32*, Uint32>(unicode,unicode_len);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
编辑 (opens new window)
上次更新: 2024/12/04, 16:28:16