#include "str_conv.h" #include "global_func.h" #include #include "log.h" //------------------------------------------------------------------------------------------------- wstring UTF8toUnicode(const string& s) { wstring ws; wchar_t wc; for (int i = 0; i < s.length();) { char c = s[i]; if ((c & 0x80) == 0) { wc = c; ++i; } else if ((c & 0xE0) == 0xC0) { wc = (s[i] & 0x1F) << 6; wc |= (s[i + 1] & 0x3F); i += 2; } else if ((c & 0xF0) == 0xE0) { wc = (s[i] & 0xF) << 12; wc |= (s[i + 1] & 0x3F) << 6; wc |= (s[i + 2] & 0x3F); i += 3; } else if ((c & 0xF8) == 0xF0) { wc = (s[i] & 0x7) << 18; wc |= (s[i + 1] & 0x3F) << 12; wc |= (s[i + 2] & 0x3F) << 6; wc |= (s[i + 3] & 0x3F); i += 4; } else if ((c & 0xFC) == 0xF8) { wc = (s[i] & 0x3) << 24; wc |= (s[i] & 0x3F) << 18; wc |= (s[i] & 0x3F) << 12; wc |= (s[i] & 0x3F) << 6; wc |= (s[i] & 0x3F); i += 5; } else if ((c & 0xFE) == 0xFC) { wc = (s[i] & 0x1) << 30; wc |= (s[i] & 0x3F) << 24; wc |= (s[i] & 0x3F) << 18; wc |= (s[i] & 0x3F) << 12; wc |= (s[i] & 0x3F) << 6; wc |= (s[i] & 0x3F); i += 6; } ws += wc; } return ws; } //------------------------------------------------------------------------------------------------- //002690C8 bool UnicodeToUTF8(char* pOutput0, const wchar_t* pWBuf1, uint dOutputLen2) { bool bret = false; int dr3; wchar_t wcr7; char var_20[4]; for (; dOutputLen2 >= 2; dOutputLen2 -= dr3, pOutput0 += dr3) { //00269100 wcr7 = *pWBuf1; dr3 = 0; bret = false; if (wcr7 == 0) { break; } //00269106 if (wcr7 <= 0x7F) { //0026910A var_20[0] = wcr7; dr3 = 1; } else { // loc_269112 if ((wcr7 >> 11) == 0) { //00269118 dr3 = 2; var_20[0] = ((wcr7 >> 6) & 0x1F) | 0xC0; var_20[1] = (wcr7 & 0x3F) | 0x80; } else { // loc_26912E dr3 = 3; var_20[0] = (0xe0 | (wcr7 >> 12)); var_20[1] = (0x80 | ((wcr7 >> 6) & 0x3f)); var_20[2] = (0x80 | (wcr7 & 0x3f)); } } // loc_26914A dr3 + 1 的目录是为了结尾多一个0 if (dOutputLen2 > (dr3 + 1)) { // loc_2690DE pWBuf1++; char* p = var_20; char* pr5 = pOutput0; for (int dr7 = dr3; dr7 != 0; dr7--) { // loc_2690F2 // loc_2690E8 *pr5++ = *p++; } } else { //00269150 bret = true; break; } } // loc_269154 *pOutput0 = '\0'; return bret; } //------------------------------------------------------------------------------------------------- //00268EDC 此函数返汇编后与原始汇编有较大差异,但是经过单元测试是正确的 int UTF8ToUnicode(wchar_t* pWOut0, const char* pUtfStr1, uint dOutputLen2) { int dret = 0; for (; dOutputLen2 >= 2; dOutputLen2--) { uint cr4 = *pUtfStr1; //00268FB0 int dr3 = 0; int dr5 = (int((int)cr4 << 24)) >> 24; if (dr5) { dr3 = 2; if ((cr4 & 0xC0) != 0x80) { // loc_268EE8 if ((cr4 - 0xF5) < 0xB || (cr4 & 0xFE) == 0xC0) { // loc_268FC6 break; } const char* pr3 = pUtfStr1 + 1; //00268EF8 //00268EF8 if (dr5 > -1) { pUtfStr1++; } else { // loc_268F04 uint dr6_268F04 = dr5 + 0x3E; dr6_268F04 = UXTB_00(dr6_268F04); if (dr6_268F04 <= 0x1D) { //00268F0E dret = 2; char cr5 = *pr3; //00268F0E if (cr5 == 0 || (cr5 & 0xC0) != 0x80) { break; } //00268F1E // dr3 = (cr4 & 0x1F); // int dr6 = cr5 & 0x3F; cr4 = (cr5 & 0x3F) | (cr4 & 0x1F); pUtfStr1 += 2; } else { // loc_268F2E if ((dr5 & 0xFFFFFFF0) == -32) { //00268F36 char cr5 = *pr3; //00268F36 if (!cr5 || !pUtfStr1[2]) { dret = 2; break; } //00268F42 dret = 2; if ((cr5 & 0xC0) == 0x80 && (pUtfStr1[2] & 0xC0) == 0x80) { //00268F54 int ddr3 = UXTH_00((cr4 << 0xC)); int dr7 = pUtfStr1[2] & 0x3F; pUtfStr1 += 3; ddr3 = ddr3 | ((cr5 & 0x3F) << 6); cr4 = ddr3 | dr7; } else { break; } } else { // loc_268F64 int ddr4 = UXTB_00((dr5 + 0x10)); if (ddr4 > 4) { dret = 2; break; } //00268F6E if (pUtfStr1[1] == 0 || pUtfStr1[2] == 0 || pUtfStr1[3] == 0) { dret = 2; break; } //00268F7A if ((pUtfStr1[1] & 0xC0) == 0x80 && (pUtfStr1[2] & 0xC0) == 0x80 && (pUtfStr1[3] & 0xC0) == 0x80) { //00268F94 int dddr3 = 0xFC0 & (pUtfStr1[2] << 6); //00268F94 int dgr7 = pUtfStr1[3] & 0x3F; dddr3 = dddr3 | ((pUtfStr1[1] & 0x3F) << 12); pUtfStr1 += 4; cr4 = dddr3 | dgr7; } else { break; } } // loc_268FA2 } // loc_268FA6 } // loc_268FA6 *pWOut0++ = cr4; } else { break; } } else { break; } } // loc_268FC6 *pWOut0 = 0; return dret; } //------------------------------------------------------------------------------------------------- //00268E6E 原始程序这儿有BUG void UnicodeToAscii(char* pOut0, wchar_t const* pWBuf1, uint dLen2, char c3) { int dr12 = 0; for (; dr12 <= dLen2; dr12++) { if (pWBuf1[dr12] == 0) { break; } if (pWBuf1[dr12] >= 0x100) { pOut0[dr12] = c3; } else { pOut0[dr12] = pWBuf1[dr12]; } } // loc_268E96 pOut0[dr12] = '\0'; } //------------------------------------------------------------------------------------------------- //00268E9E char* UTF8ToAscii(char* pOutAscii0, char const* pUtf81, uint dLen2, char c3) { wchar_t* pWBuf = new wchar_t[dLen2]; UTF8ToUnicode(pWBuf, pUtf81, dLen2); //00268EC0 UnicodeToAscii(pOutAscii0, pWBuf, dLen2, c3); delete[] pWBuf; return pOutAscii0; } //------------------------------------------------------------------------------------------------- //00268E4C //^_- 经过单元测试,功能正常 //循环取p_c_r1的元素:我的代码是p_c_r1++,原始代码是通过d_r12去取 void AsciiToUnicode(wchar_t* p_wc_r0, char const* p_c_r1, uint ud_r2) { LOGI("AsciiToUnicode entry"); int d_r12; for (d_r12 = 0; (ud_r2 >= 2) && (p_c_r1[d_r12]); ud_r2--, d_r12++) { p_wc_r0[d_r12] = p_c_r1[d_r12]; } p_wc_r0[d_r12] = 0; LOGI("AsciiToUnicode end"); } //------------------------------------------------------------------------------------------------- std::u16string s2ws(const std::string& str) { std::wstring_convert, char16_t> convert; std::u16string dest = convert.from_bytes(str); return dest; } //------------------------------------------------------------------------------------------------- std::string ws2s(const std::u16string& wstr) { using convert_typeX = std::codecvt_utf8; std::wstring_convert converterX; return converterX.to_bytes(wstr); } //------------------------------------------------------------------------------------------------- int strlen16(const char16_t* strarg) { if (!strarg) return -1; const char16_t* str = strarg; for (; *str; ++str) ; return str - strarg; } //------------------------------------------------------------------------------------------------- string trim(const string& str) { size_t first = str.find_first_not_of(' '); if (string::npos == first) { return str; } size_t last = str.find_last_not_of(' '); return str.substr(first, (last - first + 1)); } //------------------------------------------------------------------------------------------------- vector string_split(string s, char delim) { vector result; stringstream ss(s); string item; while (getline(ss, item, delim)) { item = trim(item); result.push_back(item); } return result; } //------------------------------------------------------------------------------------------------- void string_replace(std::string& data, std::string toSearch, std::string replaceStr) { size_t pos = data.find(toSearch); while (pos != std::string::npos) { data.replace(pos, toSearch.size(), replaceStr); pos = data.find(toSearch, pos + replaceStr.size()); } } //------------------------------------------------------------------------------------------------- void u16string_replace(std::u16string& data, std::u16string toSearch, std::u16string replaceStr) { size_t pos = data.find(toSearch); while (pos != std::u16string::npos) { data.replace(pos, toSearch.size(), replaceStr); pos = data.find(toSearch, pos + replaceStr.size()); } } //------------------------------------------------------------------------------------------------- //002691E4 //^_^ 经过单元测试,功能正常 wchar_t* CreateStringCopy(const wchar_t* pStr) { int dlen = strlen16((const char16_t*)pStr) + sizeof(wchar_t); wchar_t* pBuf = new wchar_t[dlen]; memcpy(pBuf, pStr, dlen * sizeof(wchar_t)); return pBuf; } //------------------------------------------------------------------------------------------------- //00269218 //^_^ 经过单元测试,功能正常 char* CreateStringCopy(const char* pStr) { int dlen = strlen(pStr) + sizeof(char); char* pBuf = new char[dlen]; strlcpy((uchar*)pBuf, (uchar*)pStr, dlen); return pBuf; } //------------------------------------------------------------------------------------------------------- //00268C2C //^_^ size_t xstrlen(const wchar_t* a1) { wchar_t v2; const wchar_t* v1 = a1 - 1; do { v2 = v1[1]; ++v1; } while (v2); return v1 - a1; } //------------------------------------------------------------------------------------------------------- wchar_t* xstrchr(const wchar_t* pbuf, wchar_t c) { const wchar_t* r3 = pbuf; // loc_268DFE while (*r3) { r3++; } //00268E06 int r12 = r3 - pbuf; int ir3 = 0; wchar_t* r0 = reinterpret_cast(const_cast(pbuf)); while (1) { if (ir3 >= r12) { return nullptr; } //00268E18 ir3++; if (*r0 == c) { return r0; } r0++; } return nullptr; } //------------------------------------------------------------------------------------------------------- wchar_t* xstrrchr(const wchar_t* pbuf, wchar_t c) { const wchar_t* r2 = pbuf; // loc_268E2A while (*r2) { r2++; } wchar_t* r0 = static_cast(const_cast(pbuf)); int ir2 = r2 - pbuf; while (ir2 > 0) { // loc_268E38 ir2--; if (*r0 == c) { return r0; } r0++; } return nullptr; } //------------------------------------------------------------------------------------------------------- //0014D150 char* strlwr(char* pBuf) { for (int i = 0; pBuf[i] != 0; ++i) { if ((pBuf[i] - 65) <= 0x19u) pBuf[i] = pBuf[i] + 32; } return pBuf; } //------------------------------------------------------------------------------------------------------- string Unicode2String(char16_t const* pWBuf) { std::u16string u16str(pWBuf); return ws2s(u16str); } //------------------------------------------------------------------------------------------------------- //00268CD2 ^_^ 经过单元测试,功能正常 int xstrcmp(const wchar_t* a1, const wchar_t* a2) { wchar_t v2; wchar_t v3; while (1) { v2 = *a2; v3 = *a1; if (!*a2) break; if (v3 < v2) return -1; if (v3 > v2) return 1; ++a1; ++a2; } if (*a1) v3 = 1; return v3; } //------------------------------------------------------------------------------------------------------- //00268C3C ^_^ 经过单元测试,功能正常 ulong xstrlcpy(wchar_t* a1, const wchar_t* a2, unsigned int a3) { const wchar_t* v3 = a2; if (a3) { a3--; v3 = a2; while (a3) { wchar_t v5 = *v3++; a3--; *a1++ = v5; if (!v5) return ((ulong)v3 - (ulong)a2) / 2 - 1; } *a1 = 0; } while (*v3++) ; return ((ulong)v3 - (ulong)a2) / 2 - 1; } //------------------------------------------------------------------------------------------------------- //001199C2 -_- 经过单元测试,功能正常 size_t strlcpy(unsigned char* a1, unsigned char* a2, int a3) { unsigned char* v3 = a2; if (a3) { a3--; v3 = a2; while (a3) { unsigned char v5 = *v3++; a3--; *a1++ = v5; if (!v5) return v3 - a2 - 1; } *a1 = 0; } while (*v3++) ; return v3 - a2 - 1; } //------------------------------------------------------------------------------------------------------- const wchar_t* xstrstr(wchar_t const*a1, wchar_t const*a2){ LOGE("xstrstr=%s%s",a1,a2); return a1+rand()%10; } size_t xstrlcat(wchar_t*a1, const wchar_t*a2, unsigned int a3) { LOGE("xstrstr=%s%s%x",a1,a2,a3); return rand()%10; }