123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471 |
- #include "str_conv.h"
- #include "global_func.h"
- #include <codecvt>
- #include "log.h"
- //-------------------------------------------------------------------------------------------------
- wstring UTF8toUnicode(const string& s) {
- wstring ws;
- wchar_t wc;
- for (int i = 0; i < s.length();) {
- char c = s[i];
- if ((c & 0x80) == 0) {
- wc = c;
- ++i;
- } else if ((c & 0xE0) == 0xC0) {
- wc = (s[i] & 0x1F) << 6;
- wc |= (s[i + 1] & 0x3F);
- i += 2;
- } else if ((c & 0xF0) == 0xE0) {
- wc = (s[i] & 0xF) << 12;
- wc |= (s[i + 1] & 0x3F) << 6;
- wc |= (s[i + 2] & 0x3F);
- i += 3;
- } else if ((c & 0xF8) == 0xF0) {
- wc = (s[i] & 0x7) << 18;
- wc |= (s[i + 1] & 0x3F) << 12;
- wc |= (s[i + 2] & 0x3F) << 6;
- wc |= (s[i + 3] & 0x3F);
- i += 4;
- } else if ((c & 0xFC) == 0xF8) {
- wc = (s[i] & 0x3) << 24;
- wc |= (s[i] & 0x3F) << 18;
- wc |= (s[i] & 0x3F) << 12;
- wc |= (s[i] & 0x3F) << 6;
- wc |= (s[i] & 0x3F);
- i += 5;
- } else if ((c & 0xFE) == 0xFC) {
- wc = (s[i] & 0x1) << 30;
- wc |= (s[i] & 0x3F) << 24;
- wc |= (s[i] & 0x3F) << 18;
- wc |= (s[i] & 0x3F) << 12;
- wc |= (s[i] & 0x3F) << 6;
- wc |= (s[i] & 0x3F);
- i += 6;
- }
- ws += wc;
- }
- return ws;
- }
- //-------------------------------------------------------------------------------------------------
- //002690C8
- bool UnicodeToUTF8(char* pOutput0, const wchar_t* pWBuf1, uint dOutputLen2) {
- bool bret = false;
- int dr3;
- wchar_t wcr7;
- char var_20[4];
- for (; dOutputLen2 >= 2; dOutputLen2 -= dr3, pOutput0 += dr3) {
- //00269100
- wcr7 = *pWBuf1;
- dr3 = 0;
- bret = false;
- if (wcr7 == 0) {
- break;
- }
- //00269106
- if (wcr7 <= 0x7F) {
- //0026910A
- var_20[0] = wcr7;
- dr3 = 1;
- } else {
- // loc_269112
- if ((wcr7 >> 11) == 0) {
- //00269118
- dr3 = 2;
- var_20[0] = ((wcr7 >> 6) & 0x1F) | 0xC0;
- var_20[1] = (wcr7 & 0x3F) | 0x80;
- } else {
- // loc_26912E
- dr3 = 3;
- var_20[0] = (0xe0 | (wcr7 >> 12));
- var_20[1] = (0x80 | ((wcr7 >> 6) & 0x3f));
- var_20[2] = (0x80 | (wcr7 & 0x3f));
- }
- }
- // loc_26914A dr3 + 1 的目录是为了结尾多一个0
- if (dOutputLen2 > (dr3 + 1)) {
- // loc_2690DE
- pWBuf1++;
- char* p = var_20;
- char* pr5 = pOutput0;
- for (int dr7 = dr3; dr7 != 0; dr7--) { // loc_2690F2
- // loc_2690E8
- *pr5++ = *p++;
- }
- } else {
- //00269150
- bret = true;
- break;
- }
- }
- // loc_269154
- *pOutput0 = '\0';
- return bret;
- }
- //-------------------------------------------------------------------------------------------------
- //00268EDC 此函数返汇编后与原始汇编有较大差异,但是经过单元测试是正确的
- int UTF8ToUnicode(wchar_t* pWOut0, const char* pUtfStr1, uint dOutputLen2) {
- int dret = 0;
- for (; dOutputLen2 >= 2; dOutputLen2--) {
- uint cr4 = *pUtfStr1; //00268FB0
- int dr3 = 0;
- int dr5 = (int((int)cr4 << 24)) >> 24;
- if (dr5) {
- dr3 = 2;
- if ((cr4 & 0xC0) != 0x80) {
- // loc_268EE8
- if ((cr4 - 0xF5) < 0xB || (cr4 & 0xFE) == 0xC0) {
- // loc_268FC6
- break;
- }
- const char* pr3 = pUtfStr1 + 1; //00268EF8
- //00268EF8
- if (dr5 > -1) {
- pUtfStr1++;
- } else {
- // loc_268F04
- uint dr6_268F04 = dr5 + 0x3E;
- dr6_268F04 = UXTB_00(dr6_268F04);
- if (dr6_268F04 <= 0x1D) {
- //00268F0E
- dret = 2;
- char cr5 = *pr3; //00268F0E
- if (cr5 == 0 || (cr5 & 0xC0) != 0x80) {
- break;
- }
- //00268F1E
- // dr3 = (cr4 & 0x1F);
- // int dr6 = cr5 & 0x3F;
- cr4 = (cr5 & 0x3F) | (cr4 & 0x1F);
- pUtfStr1 += 2;
- } else {
- // loc_268F2E
- if ((dr5 & 0xFFFFFFF0) == -32) {
- //00268F36
- char cr5 = *pr3; //00268F36
- if (!cr5 || !pUtfStr1[2]) {
- dret = 2;
- break;
- }
- //00268F42
- dret = 2;
- if ((cr5 & 0xC0) == 0x80 && (pUtfStr1[2] & 0xC0) == 0x80) {
- //00268F54
- int ddr3 = UXTH_00((cr4 << 0xC));
- int dr7 = pUtfStr1[2] & 0x3F;
- pUtfStr1 += 3;
- ddr3 = ddr3 | ((cr5 & 0x3F) << 6);
- cr4 = ddr3 | dr7;
- } else {
- break;
- }
- } else {
- // loc_268F64
- int ddr4 = UXTB_00((dr5 + 0x10));
- if (ddr4 > 4) {
- dret = 2;
- break;
- }
- //00268F6E
- if (pUtfStr1[1] == 0 || pUtfStr1[2] == 0 || pUtfStr1[3] == 0) {
- dret = 2;
- break;
- }
- //00268F7A
- if ((pUtfStr1[1] & 0xC0) == 0x80 && (pUtfStr1[2] & 0xC0) == 0x80 && (pUtfStr1[3] & 0xC0) == 0x80) {
- //00268F94
- int dddr3 = 0xFC0 & (pUtfStr1[2] << 6); //00268F94
- int dgr7 = pUtfStr1[3] & 0x3F;
- dddr3 = dddr3 | ((pUtfStr1[1] & 0x3F) << 12);
- pUtfStr1 += 4;
- cr4 = dddr3 | dgr7;
- } else {
- break;
- }
- }
- // loc_268FA2
- }
- // loc_268FA6
- }
- // loc_268FA6
- *pWOut0++ = cr4;
- } else {
- break;
- }
- } else {
- break;
- }
- }
- // loc_268FC6
- *pWOut0 = 0;
- return dret;
- }
- //-------------------------------------------------------------------------------------------------
- //00268E6E 原始程序这儿有BUG
- void UnicodeToAscii(char* pOut0, wchar_t const* pWBuf1, uint dLen2, char c3) {
- int dr12 = 0;
- for (; dr12 <= dLen2; dr12++) {
- if (pWBuf1[dr12] == 0) {
- break;
- }
- if (pWBuf1[dr12] >= 0x100) {
- pOut0[dr12] = c3;
- } else {
- pOut0[dr12] = pWBuf1[dr12];
- }
- }
- // loc_268E96
- pOut0[dr12] = '\0';
- }
- //-------------------------------------------------------------------------------------------------
- //00268E9E
- char* UTF8ToAscii(char* pOutAscii0, char const* pUtf81, uint dLen2, char c3) {
- wchar_t* pWBuf = new wchar_t[dLen2];
- UTF8ToUnicode(pWBuf, pUtf81, dLen2); //00268EC0
- UnicodeToAscii(pOutAscii0, pWBuf, dLen2, c3);
- delete[] pWBuf;
- return pOutAscii0;
- }
- //-------------------------------------------------------------------------------------------------
- //00268E4C //^_- 经过单元测试,功能正常
- //循环取p_c_r1的元素:我的代码是p_c_r1++,原始代码是通过d_r12去取
- void AsciiToUnicode(wchar_t* p_wc_r0, char const* p_c_r1, uint ud_r2) {
- LOGI("AsciiToUnicode entry");
- int d_r12;
- for (d_r12 = 0; (ud_r2 >= 2) && (p_c_r1[d_r12]); ud_r2--, d_r12++) {
- p_wc_r0[d_r12] = p_c_r1[d_r12];
- }
- p_wc_r0[d_r12] = 0;
- LOGI("AsciiToUnicode end");
- }
- //-------------------------------------------------------------------------------------------------
- std::u16string s2ws(const std::string& str) {
- std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
- std::u16string dest = convert.from_bytes(str);
- return dest;
- }
- //-------------------------------------------------------------------------------------------------
- std::string ws2s(const std::u16string& wstr) {
- using convert_typeX = std::codecvt_utf8<char16_t>;
- std::wstring_convert<convert_typeX, char16_t> converterX;
- return converterX.to_bytes(wstr);
- }
- //-------------------------------------------------------------------------------------------------
- int strlen16(const char16_t* strarg) {
- if (!strarg)
- return -1;
- const char16_t* str = strarg;
- for (; *str; ++str)
- ;
- return str - strarg;
- }
- //-------------------------------------------------------------------------------------------------
- string trim(const string& str) {
- size_t first = str.find_first_not_of(' ');
- if (string::npos == first) {
- return str;
- }
- size_t last = str.find_last_not_of(' ');
- return str.substr(first, (last - first + 1));
- }
- //-------------------------------------------------------------------------------------------------
- vector<string> string_split(string s, char delim) {
- vector<string> result;
- stringstream ss(s);
- string item;
- while (getline(ss, item, delim)) {
- item = trim(item);
- result.push_back(item);
- }
- return result;
- }
- //-------------------------------------------------------------------------------------------------
- void string_replace(std::string& data, std::string toSearch, std::string replaceStr) {
- size_t pos = data.find(toSearch);
- while (pos != std::string::npos) {
- data.replace(pos, toSearch.size(), replaceStr);
- pos = data.find(toSearch, pos + replaceStr.size());
- }
- }
- //-------------------------------------------------------------------------------------------------
- void u16string_replace(std::u16string& data, std::u16string toSearch, std::u16string replaceStr) {
- size_t pos = data.find(toSearch);
- while (pos != std::u16string::npos) {
- data.replace(pos, toSearch.size(), replaceStr);
- pos = data.find(toSearch, pos + replaceStr.size());
- }
- }
- //-------------------------------------------------------------------------------------------------
- //002691E4 //^_^ 经过单元测试,功能正常
- wchar_t* CreateStringCopy(const wchar_t* pStr) {
- int dlen = strlen16((const char16_t*)pStr) + sizeof(wchar_t);
- wchar_t* pBuf = new wchar_t[dlen];
- memcpy(pBuf, pStr, dlen * sizeof(wchar_t));
- return pBuf;
- }
- //-------------------------------------------------------------------------------------------------
- //00269218 //^_^ 经过单元测试,功能正常
- char* CreateStringCopy(const char* pStr) {
- int dlen = strlen(pStr) + sizeof(char);
- char* pBuf = new char[dlen];
- strlcpy((uchar*)pBuf, (uchar*)pStr, dlen);
- return pBuf;
- }
- //-------------------------------------------------------------------------------------------------------
- //00268C2C //^_^
- size_t xstrlen(const wchar_t* a1) {
- wchar_t v2;
- const wchar_t* v1 = a1 - 1;
- do {
- v2 = v1[1];
- ++v1;
- } while (v2);
- return v1 - a1;
- }
- //-------------------------------------------------------------------------------------------------------
- wchar_t* xstrchr(const wchar_t* pbuf, wchar_t c) {
- const wchar_t* r3 = pbuf;
- // loc_268DFE
- while (*r3) {
- r3++;
- }
- //00268E06
- int r12 = r3 - pbuf;
- int ir3 = 0;
- wchar_t* r0 = reinterpret_cast<wchar_t*>(const_cast<wchar_t*>(pbuf));
- while (1) {
- if (ir3 >= r12) {
- return nullptr;
- }
- //00268E18
- ir3++;
- if (*r0 == c) {
- return r0;
- }
- r0++;
- }
- return nullptr;
- }
- //-------------------------------------------------------------------------------------------------------
- wchar_t* xstrrchr(const wchar_t* pbuf, wchar_t c) {
- const wchar_t* r2 = pbuf;
- // loc_268E2A
- while (*r2) {
- r2++;
- }
- wchar_t* r0 = static_cast<wchar_t*>(const_cast<wchar_t*>(pbuf));
- int ir2 = r2 - pbuf;
- while (ir2 > 0) {
- // loc_268E38
- ir2--;
- if (*r0 == c) {
- return r0;
- }
- r0++;
- }
- return nullptr;
- }
- //-------------------------------------------------------------------------------------------------------
- //0014D150
- char* strlwr(char* pBuf) {
- for (int i = 0; pBuf[i] != 0; ++i) {
- if ((pBuf[i] - 65) <= 0x19u)
- pBuf[i] = pBuf[i] + 32;
- }
- return pBuf;
- }
- //-------------------------------------------------------------------------------------------------------
- string Unicode2String(char16_t const* pWBuf) {
- std::u16string u16str(pWBuf);
- return ws2s(u16str);
- }
- //-------------------------------------------------------------------------------------------------------
- //00268CD2 ^_^ 经过单元测试,功能正常
- int xstrcmp(const wchar_t* a1, const wchar_t* a2) {
- wchar_t v2;
- wchar_t v3;
- while (1) {
- v2 = *a2;
- v3 = *a1;
- if (!*a2)
- break;
- if (v3 < v2)
- return -1;
- if (v3 > v2)
- return 1;
- ++a1;
- ++a2;
- }
- if (*a1)
- v3 = 1;
- return v3;
- }
- //-------------------------------------------------------------------------------------------------------
- //00268C3C ^_^ 经过单元测试,功能正常
- ulong xstrlcpy(wchar_t* a1, const wchar_t* a2, unsigned int a3) {
- const wchar_t* v3 = a2;
- if (a3) {
- a3--;
- v3 = a2;
- while (a3) {
- wchar_t v5 = *v3++;
- a3--;
- *a1++ = v5;
- if (!v5)
- return ((ulong)v3 - (ulong)a2) / 2 - 1;
- }
- *a1 = 0;
- }
- while (*v3++)
- ;
- return ((ulong)v3 - (ulong)a2) / 2 - 1;
- }
- //-------------------------------------------------------------------------------------------------------
- //001199C2 -_- 经过单元测试,功能正常
- size_t strlcpy(unsigned char* a1, unsigned char* a2, int a3) {
- unsigned char* v3 = a2;
- if (a3) {
- a3--;
- v3 = a2;
- while (a3) {
- unsigned char v5 = *v3++;
- a3--;
- *a1++ = v5;
- if (!v5)
- return v3 - a2 - 1;
- }
- *a1 = 0;
- }
- while (*v3++)
- ;
- return v3 - a2 - 1;
- }
- //-------------------------------------------------------------------------------------------------------
- const wchar_t* xstrstr(wchar_t const*a1, wchar_t const*a2){
- LOGE("xstrstr=%s%s",a1,a2);
- return a1+rand()%10;
- }
- size_t xstrlcat(wchar_t*a1, const wchar_t*a2, unsigned int a3)
- {
- LOGE("xstrstr=%s%s%x",a1,a2,a3);
- return rand()%10;
- }
|