-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathirsencode.h
More file actions
134 lines (115 loc) · 4.36 KB
/
irsencode.h
File metadata and controls
134 lines (115 loc) · 4.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// @brief ôóíêöèè ïåðåâîäà ñòðîê â ðàçëè÷íûê êîäèðîâêè
//
// Äàòà ñîçäàíèÿ: 12.04.2021
// Ðåàëèçàöèÿ: Ãàëèìçÿíîâ
// Íåêîòîðûå èñïðàâëåíèÿ: Êðàøåíèííèêîâ
#ifndef ENCODE_H
#define ENCODE_H
#include <irsdefs.h>
#include <irscpp.h>
#include <irsnetdefs.h>
#include <irserror.h>
#if IRS_USE_UTF8_CPP
#include <utf8.h>
#endif // IRS_USE_UTF8_CPP
namespace irs
{
/**
* @brief ôóíêöèÿ ïåðåâîäà ñòðîêè èç êîäèðîâêè cp1251 â utf8.
*
* @param start: óêàçàòåëü íà íà÷àëî ñîîáùåíèÿ.
* @param end: óêàçàòåëü íà êîíåö ñîîáùåíèÿ.
* @param result: óêàçàòåëü íà âûõîäíûå äàííûå.
*
* @return size_t: ðàçìåðíîñòü ïåðåêîäèðîâàííûõ äàííûõ.
*/
inline size_t cp1251_to_utf8(const char* start, const char* end, char* result)
{
static const char table[128 * 3 + 1] = {
"\320\202 \320\203 \342\200\232\321\223 \342\200\236\342\200\246\342\200\240\342\200\241"
"\342\202\254\342\200\260\320\211 \342\200\271\320\212 \320\214 \320\213 \320\217 "
"\321\222 \342\200\230\342\200\231\342\200\234\342\200\235\342\200\242\342\200\223\342\200\224"
" \342\204\242\321\231 \342\200\272\321\232 \321\234 \321\233 \321\237 "
"\302\240 \320\216 \321\236 \320\210 \302\244 \322\220 \302\246 \302\247 "
"\320\201 \302\251 \320\204 \302\253 \302\254 \302\255 \302\256 \320\207 "
"\302\260 \302\261 \320\206 \321\226 \322\221 \302\265 \302\266 \302\267 "
"\321\221 \342\204\226\321\224 \302\273 \321\230 \320\205 \321\225 \321\227 "
"\320\220 \320\221 \320\222 \320\223 \320\224 \320\225 \320\226 \320\227 "
"\320\230 \320\231 \320\232 \320\233 \320\234 \320\235 \320\236 \320\237 "
"\320\240 \320\241 \320\242 \320\243 \320\244 \320\245 \320\246 \320\247 "
"\320\250 \320\251 \320\252 \320\253 \320\254 \320\255 \320\256 \320\257 "
"\320\260 \320\261 \320\262 \320\263 \320\264 \320\265 \320\266 \320\267 "
"\320\270 \320\271 \320\272 \320\273 \320\274 \320\275 \320\276 \320\277 "
"\321\200 \321\201 \321\202 \321\203 \321\204 \321\205 \321\206 \321\207 "
"\321\210 \321\211 \321\212 \321\213 \321\214 \321\215 \321\216 \321\217 "};
char* result_end = result;
for (; start < end; start++) {
if (*start & 0x80) {
const char *p = &table[3 * (0x7f & *start)];
if (*p == ' ') { continue; }
*result_end++ = *p++;
*result_end++ = *p++;
if (*p == ' ') { continue; }
*result_end++ = *p++;
} else {
*result_end++ = *start;
}
}
return result_end - result;
}
#if IRS_USE_UTF8_CPP
/**
* @brief ôóíêöèÿ ïåðåâîäà ñòðîêè èç êîäèðîâêè UTF-16 (UTF-32) â UTF-8. Ôóíêöèÿ
* îïðåäåëÿåò ðàçìåðíîñòü wchat_t è â çàâèñèìîñòè îò ýòîãî îïðåäåëÿåò íà÷àëüíóþ
* êîäèðîâêó.  ñëó÷àå, åñëè ðàçìåðíîñòü íå ðàâíà íè 2, íè 4 - âûäàåò îøèáêó.
*
* @param start: óêàçàòåëü íà íà÷àëî ñîîáùåíèÿ.
* @param end: óêàçàòåëü íà êîíåö ñîîáùåíèÿ.
* @param result: óêàçàòåëü íà âûõîäíûå äàííûå.
*
* @return size_t: ðàçìåðíîñòü ïåðåêîäèðîâàííîãî ñîîáùåíèÿ.
*/
inline size_t wsymbols_to_utf8(const wchar_t* start, const wchar_t* end,
char* result)
{
char* result_end = NULL;
if (sizeof(wchar_t) == 2) { // utf16
result_end = utf8::unchecked::utf16to8(start, end, result);
} else if (sizeof(wchar_t) == 4) { // utf32
result_end = utf8::unchecked::utf32to8(start, end, result);
} else {
IRS_STATIC_ASSERT((sizeof(wchar_t) == 2) || (sizeof(wchar_t) == 4));
}
return result_end - result;
}
/**
* @brief ôóíêöèÿ ïåðåãðóçêè äëÿ ïåðåêîäèðîâêè èç CP1251 â UTF-8.
*
* @param start: óêàçàòåëü íà íà÷àëî ñîîáùåíèÿ.
* @param end: óêàçàòåëü íà êîíåö ñîîáùåíèÿ.
* @param result: óêàçàòåëü íà âûõîäíûå äàííûå.
*
* @return size_t: ðàçìåðíîñòü ïåðåêîäèðîâàííîãî ñîîáùåíèÿ.
*/
inline size_t lwipbuf_to_utf8(const char* start, const char* end,
char* result)
{
return cp1251_to_utf8(start, end, result);
}
/**
* @brief ôóíêöèÿ ïåðåãðóçêè äëÿ ïåðåêîäèðîâêè èç UTF-16(UTF-32) â UTF-8.
*
* @param start: óêàçàòåëü íà íà÷àëî ñîîáùåíèÿ.
* @param end: óêàçàòåëü íà êîíåö ñîîáùåíèÿ.
* @param result: óêàçàòåëü íà âûõîäíûå äàííûå.
*
* @return size_t: ðàçìåðíîñòü ïåðåêîäèðîâàííîãî ñîîáùåíèÿ.
*/
inline size_t lwipbuf_to_utf8(const wchar_t* start, const wchar_t* end,
char* result)
{
return wsymbols_to_utf8(start, end, result);
}
#endif // IRS_USE_UTF8_CPP
} // namespace irs
#endif // ENCODE_H