diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 91d79493..a4008fd1 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -7,6 +7,7 @@ list(APPEND _LIBUI_SOURCES common/matrix.c common/shouldquit.c common/userbugs.c + common/utf.c ) set(_LIBUI_SOURCES ${_LIBUI_SOURCES} PARENT_SCOPE) diff --git a/common/uipriv.h b/common/uipriv.h index d6b54e89..f22a08a6 100644 --- a/common/uipriv.h +++ b/common/uipriv.h @@ -1,4 +1,7 @@ // 6 april 2015 +// TODO can extern "C"s nest? +#include "utf.h" + #ifdef __cplusplus extern "C" { #endif diff --git a/windows/utf16.cpp b/windows/utf16.cpp index 98954d0a..6271fff7 100644 --- a/windows/utf16.cpp +++ b/windows/utf16.cpp @@ -3,48 +3,42 @@ // see http://stackoverflow.com/a/29556509/3408572 -#define MBTWC(str, wstr, bufsiz) MultiByteToWideChar(CP_UTF8, 0, str, -1, wstr, bufsiz) - WCHAR *toUTF16(const char *str) { WCHAR *wstr; - int n; + WCHAR *wp; + size_t n; + uint32_t rune; if (*str == '\0') // empty string return emptyUTF16(); - n = MBTWC(str, NULL, 0); - if (n == 0) { - logLastError(L"error figuring out number of characters to convert to"); - return emptyUTF16(); - } - wstr = (WCHAR *) uiAlloc(n * sizeof (WCHAR), "WCHAR[]"); - if (MBTWC(str, wstr, n) != n) { - logLastError(L"error converting from UTF-8 to UTF-16"); - // and return an empty string - *wstr = L'\0'; + n = utf8UTF16Count(str, 0); + wstr = (WCHAR *) uiAlloc((n + 1) * sizeof (WCHAR), "WCHAR[]"); + wp = wstr; + while (*str) { + str = utf8DecodeRune(str, 0, &rune); + n = utf16EncodeRune(rune, wp); + wp += n; } return wstr; } -#define WCTMB(wstr, str, bufsiz) WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, bufsiz, NULL, NULL) - char *toUTF8(const WCHAR *wstr) { char *str; - int n; + char *sp; + size_t n; + uint32_t rune; if (*wstr == L'\0') // empty string return emptyUTF8(); - n = WCTMB(wstr, NULL, 0); - if (n == 0) { - logLastError(L"error figuring out number of characters to convert to"); - return emptyUTF8(); - } - str = (char *) uiAlloc(n * sizeof (char), "char[]"); - if (WCTMB(wstr, str, n) != n) { - logLastError(L"error converting from UTF-16 to UTF-8"); - // and return an empty string - *str = '\0'; + n = utf16RuneCount(wstr, 0); + str = (char *) uiAlloc((n + 1) * sizeof (char), "char[]"); + sp = str; + while (*wstr) { + wstr = utf16DecodeRune(wstr, &rune); + n = utf8EncodeRune(rune, sp); + sp += n; } return str; } @@ -92,6 +86,8 @@ WCHAR *vstrf(const WCHAR *format, va_list ap) return buf; } +// TODO merge the following two with the toUTF*()s? + // Let's shove these utility routines here too. // Prerequisite: lfonly is UTF-8. char *LFtoCRLF(const char *lfonly)