forked from Mirrorlandia_minetest/minetest
Refactor utf8_to_wide/wide_to_utf8 functions
This commit is contained in:
parent
7ebd5da9cd
commit
5e392cf34f
@ -302,9 +302,18 @@ void TestUtilities::testAsciiPrintableHelper()
|
|||||||
|
|
||||||
void TestUtilities::testUTF8()
|
void TestUtilities::testUTF8()
|
||||||
{
|
{
|
||||||
UASSERT(wide_to_utf8(utf8_to_wide("")) == "");
|
UASSERT(utf8_to_wide("¤") == L"¤");
|
||||||
UASSERT(wide_to_utf8(utf8_to_wide("the shovel dug a crumbly node!"))
|
|
||||||
== "the shovel dug a crumbly node!");
|
UASSERT(wide_to_utf8(L"¤") == "¤");
|
||||||
|
|
||||||
|
UASSERTEQ(std::string, wide_to_utf8(utf8_to_wide("")), "");
|
||||||
|
UASSERTEQ(std::string, wide_to_utf8(utf8_to_wide("the shovel dug a crumbly node!")),
|
||||||
|
"the shovel dug a crumbly node!");
|
||||||
|
UASSERTEQ(std::string, wide_to_utf8(utf8_to_wide("-ä-")),
|
||||||
|
"-ä-");
|
||||||
|
UASSERTEQ(std::string, wide_to_utf8(utf8_to_wide("-\xF0\xA0\x80\x8B-")),
|
||||||
|
"-\xF0\xA0\x80\x8B-");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void TestUtilities::testRemoveEscapes()
|
void TestUtilities::testRemoveEscapes()
|
||||||
|
@ -50,8 +50,8 @@ static bool parseNamedColorString(const std::string &value, video::SColor &color
|
|||||||
|
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
|
|
||||||
bool convert(const char *to, const char *from, char *outbuf,
|
static bool convert(const char *to, const char *from, char *outbuf,
|
||||||
size_t outbuf_size, char *inbuf, size_t inbuf_size)
|
size_t *outbuf_size, char *inbuf, size_t inbuf_size)
|
||||||
{
|
{
|
||||||
iconv_t cd = iconv_open(to, from);
|
iconv_t cd = iconv_open(to, from);
|
||||||
|
|
||||||
@ -60,15 +60,14 @@ bool convert(const char *to, const char *from, char *outbuf,
|
|||||||
#else
|
#else
|
||||||
char *inbuf_ptr = inbuf;
|
char *inbuf_ptr = inbuf;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
char *outbuf_ptr = outbuf;
|
char *outbuf_ptr = outbuf;
|
||||||
|
|
||||||
size_t *inbuf_left_ptr = &inbuf_size;
|
size_t *inbuf_left_ptr = &inbuf_size;
|
||||||
size_t *outbuf_left_ptr = &outbuf_size;
|
|
||||||
|
|
||||||
|
const size_t old_outbuf_size = *outbuf_size;
|
||||||
size_t old_size = inbuf_size;
|
size_t old_size = inbuf_size;
|
||||||
while (inbuf_size > 0) {
|
while (inbuf_size > 0) {
|
||||||
iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_left_ptr);
|
iconv(cd, &inbuf_ptr, inbuf_left_ptr, &outbuf_ptr, outbuf_size);
|
||||||
if (inbuf_size == old_size) {
|
if (inbuf_size == old_size) {
|
||||||
iconv_close(cd);
|
iconv_close(cd);
|
||||||
return false;
|
return false;
|
||||||
@ -77,11 +76,12 @@ bool convert(const char *to, const char *from, char *outbuf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
iconv_close(cd);
|
iconv_close(cd);
|
||||||
|
*outbuf_size = old_outbuf_size - *outbuf_size;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __ANDROID__
|
#ifdef __ANDROID__
|
||||||
// Android need manual caring to support the full character set possible with wchar_t
|
// On Android iconv disagrees how big a wchar_t is for whatever reason
|
||||||
const char *DEFAULT_ENCODING = "UTF-32LE";
|
const char *DEFAULT_ENCODING = "UTF-32LE";
|
||||||
#else
|
#else
|
||||||
const char *DEFAULT_ENCODING = "WCHAR_T";
|
const char *DEFAULT_ENCODING = "WCHAR_T";
|
||||||
@ -89,58 +89,52 @@ const char *DEFAULT_ENCODING = "WCHAR_T";
|
|||||||
|
|
||||||
std::wstring utf8_to_wide(const std::string &input)
|
std::wstring utf8_to_wide(const std::string &input)
|
||||||
{
|
{
|
||||||
size_t inbuf_size = input.length() + 1;
|
const size_t inbuf_size = input.length();
|
||||||
// maximum possible size, every character is sizeof(wchar_t) bytes
|
// maximum possible size, every character is sizeof(wchar_t) bytes
|
||||||
size_t outbuf_size = (input.length() + 1) * sizeof(wchar_t);
|
size_t outbuf_size = input.length() * sizeof(wchar_t);
|
||||||
|
|
||||||
char *inbuf = new char[inbuf_size];
|
char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
|
||||||
memcpy(inbuf, input.c_str(), inbuf_size);
|
memcpy(inbuf, input.c_str(), inbuf_size);
|
||||||
char *outbuf = new char[outbuf_size];
|
std::wstring out;
|
||||||
memset(outbuf, 0, outbuf_size);
|
out.resize(outbuf_size / sizeof(wchar_t));
|
||||||
|
|
||||||
#ifdef __ANDROID__
|
#ifdef __ANDROID__
|
||||||
// Android need manual caring to support the full character set possible with wchar_t
|
|
||||||
SANITY_CHECK(sizeof(wchar_t) == 4);
|
SANITY_CHECK(sizeof(wchar_t) == 4);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, outbuf_size, inbuf, inbuf_size)) {
|
char *outbuf = reinterpret_cast<char*>(&out[0]);
|
||||||
|
if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
|
||||||
infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
|
infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)
|
||||||
<< " into wstring" << std::endl;
|
<< " into wstring" << std::endl;
|
||||||
delete[] inbuf;
|
delete[] inbuf;
|
||||||
delete[] outbuf;
|
|
||||||
return L"<invalid UTF-8 string>";
|
return L"<invalid UTF-8 string>";
|
||||||
}
|
}
|
||||||
std::wstring out((wchar_t *)outbuf);
|
|
||||||
|
|
||||||
delete[] inbuf;
|
delete[] inbuf;
|
||||||
delete[] outbuf;
|
|
||||||
|
|
||||||
|
out.resize(outbuf_size / sizeof(wchar_t));
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string wide_to_utf8(const std::wstring &input)
|
std::string wide_to_utf8(const std::wstring &input)
|
||||||
{
|
{
|
||||||
size_t inbuf_size = (input.length() + 1) * sizeof(wchar_t);
|
const size_t inbuf_size = input.length() * sizeof(wchar_t);
|
||||||
// maximum possible size: utf-8 encodes codepoints using 1 up to 6 bytes
|
// maximum possible size: utf-8 encodes codepoints using 1 up to 4 bytes
|
||||||
size_t outbuf_size = (input.length() + 1) * 6;
|
size_t outbuf_size = input.length() * 4;
|
||||||
|
|
||||||
char *inbuf = new char[inbuf_size];
|
char *inbuf = new char[inbuf_size]; // intentionally NOT null-terminated
|
||||||
memcpy(inbuf, input.c_str(), inbuf_size);
|
memcpy(inbuf, input.c_str(), inbuf_size);
|
||||||
char *outbuf = new char[outbuf_size];
|
std::string out;
|
||||||
memset(outbuf, 0, outbuf_size);
|
out.resize(outbuf_size);
|
||||||
|
|
||||||
if (!convert("UTF-8", DEFAULT_ENCODING, outbuf, outbuf_size, inbuf, inbuf_size)) {
|
if (!convert("UTF-8", DEFAULT_ENCODING, &out[0], &outbuf_size, inbuf, inbuf_size)) {
|
||||||
infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size)
|
infostream << "Couldn't convert wstring 0x" << hex_encode(inbuf, inbuf_size)
|
||||||
<< " into UTF-8 string" << std::endl;
|
<< " into UTF-8 string" << std::endl;
|
||||||
delete[] inbuf;
|
delete[] inbuf;
|
||||||
delete[] outbuf;
|
return "<invalid wide string>";
|
||||||
return "<invalid wstring>";
|
|
||||||
}
|
}
|
||||||
std::string out(outbuf);
|
|
||||||
|
|
||||||
delete[] inbuf;
|
delete[] inbuf;
|
||||||
delete[] outbuf;
|
|
||||||
|
|
||||||
|
out.resize(outbuf_size);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -172,15 +166,12 @@ std::string wide_to_utf8(const std::wstring &input)
|
|||||||
|
|
||||||
#endif // _WIN32
|
#endif // _WIN32
|
||||||
|
|
||||||
// You must free the returned string!
|
|
||||||
// The returned string is allocated using new
|
|
||||||
wchar_t *utf8_to_wide_c(const char *str)
|
wchar_t *utf8_to_wide_c(const char *str)
|
||||||
{
|
{
|
||||||
std::wstring ret = utf8_to_wide(std::string(str));
|
std::wstring ret = utf8_to_wide(std::string(str));
|
||||||
size_t len = ret.length();
|
size_t len = ret.length();
|
||||||
wchar_t *ret_c = new wchar_t[len + 1];
|
wchar_t *ret_c = new wchar_t[len + 1];
|
||||||
memset(ret_c, 0, (len + 1) * sizeof(wchar_t));
|
memcpy(ret_c, ret.c_str(), (len + 1) * sizeof(wchar_t));
|
||||||
memcpy(ret_c, ret.c_str(), len * sizeof(wchar_t));
|
|
||||||
return ret_c;
|
return ret_c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,11 +64,13 @@ struct FlagDesc {
|
|||||||
u32 flag;
|
u32 flag;
|
||||||
};
|
};
|
||||||
|
|
||||||
// try not to convert between wide/utf8 encodings; this can result in data loss
|
// Try to avoid converting between wide and UTF-8 unless you need to
|
||||||
// try to only convert between them when you need to input/output stuff via Irrlicht
|
// input/output stuff via Irrlicht
|
||||||
std::wstring utf8_to_wide(const std::string &input);
|
std::wstring utf8_to_wide(const std::string &input);
|
||||||
std::string wide_to_utf8(const std::wstring &input);
|
std::string wide_to_utf8(const std::wstring &input);
|
||||||
|
|
||||||
|
// You must free the returned string!
|
||||||
|
// The returned string is allocated using new[]
|
||||||
wchar_t *utf8_to_wide_c(const char *str);
|
wchar_t *utf8_to_wide_c(const char *str);
|
||||||
|
|
||||||
// NEVER use those two functions unless you have a VERY GOOD reason to
|
// NEVER use those two functions unless you have a VERY GOOD reason to
|
||||||
|
Loading…
Reference in New Issue
Block a user