Rework iconv encoding detection

WCHAR_T doesn't seem as portable as we thought, so it's just easier
to detect the right encoding using macros at this point.
This commit is contained in:
sfan5 2024-03-16 15:10:11 +01:00
parent d1a1aed23e
commit 008d6be900
2 changed files with 35 additions and 32 deletions

@ -24,25 +24,36 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#include "ieee_float.h"
#include "config.h"
#if HAVE_ENDIAN_H
#ifdef _WIN32
#define __BYTE_ORDER 0
#define __LITTLE_ENDIAN 0
#define __BIG_ENDIAN 1
#elif defined(__MACH__) && defined(__APPLE__)
#include <machine/endian.h>
#elif defined(__FreeBSD__) || defined(__DragonFly__)
#include <sys/endian.h>
#else
#include <endian.h>
#endif
#endif
#include <cstring> // for memcpy
#include <cassert>
#include <iostream>
#include <string>
#include <string_view>
/* make sure BYTE_ORDER macros are available */
#ifdef _WIN32
#define BYTE_ORDER 1234
#elif defined(__MACH__) && defined(__APPLE__)
#include <machine/endian.h>
#elif defined(__FreeBSD__) || defined(__DragonFly__)
#include <sys/endian.h>
#elif HAVE_ENDIAN_H
#include <endian.h>
#else
#error "Can't detect endian (missing header)"
#endif
#ifndef LITTLE_ENDIAN
#define LITTLE_ENDIAN 1234
#endif
#ifndef BIG_ENDIAN
#define BIG_ENDIAN 4321
#endif
#if !defined(BYTE_ORDER) && defined(_BYTE_ORDER)
#define BYTE_ORDER _BYTE_ORDER
#elif !defined(BYTE_ORDER) && defined(__BYTE_ORDER)
#define BYTE_ORDER __BYTE_ORDER
#endif
#define FIXEDPOINT_FACTOR 1000.0f
// 0x7FFFFFFF / 1000.0f is not serializable.

@ -18,7 +18,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
*/
#include "string.h"
#include "pointer.h"
#include "serialize.h" // BYTE_ORDER
#include "numeric.h"
#include "log.h"
@ -67,20 +67,16 @@ static bool convert(const char *to, const char *from, char *outbuf,
return true;
}
#ifdef __ANDROID__
// On Android iconv disagrees how big a wchar_t is for whatever reason
const char *DEFAULT_ENCODING = "UTF-32LE";
#elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__)
// NetBSD does not allow "WCHAR_T" as a charset input to iconv.
#include <sys/endian.h>
#if BYTE_ORDER == BIG_ENDIAN
const char *DEFAULT_ENCODING = "UTF-32BE";
#else
const char *DEFAULT_ENCODING = "UTF-32LE";
#endif
#else
const char *DEFAULT_ENCODING = "WCHAR_T";
#endif
// select right encoding for wchar_t size
constexpr auto DEFAULT_ENCODING = ([] () -> const char* {
constexpr auto sz = sizeof(wchar_t);
static_assert(sz == 2 || sz == 4, "Unexpected wide char size");
if constexpr (sz == 2) {
return (BYTE_ORDER == BIG_ENDIAN) ? "UTF-16BE" : "UTF-16LE";
} else {
return (BYTE_ORDER == BIG_ENDIAN) ? "UTF-32BE" : "UTF-32LE";
}
})();
std::wstring utf8_to_wide(std::string_view input)
{
@ -93,10 +89,6 @@ std::wstring utf8_to_wide(std::string_view input)
std::wstring out;
out.resize(outbuf_size / sizeof(wchar_t));
#if defined(__ANDROID__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__FreeBSD__)
static_assert(sizeof(wchar_t) == 4, "Unexpected wide char size");
#endif
char *outbuf = reinterpret_cast<char*>(&out[0]);
if (!convert(DEFAULT_ENCODING, "UTF-8", outbuf, &outbuf_size, inbuf, inbuf_size)) {
infostream << "Couldn't convert UTF-8 string 0x" << hex_encode(input)